# main dataset
d_import <- read_csv("data/final_dataset.csv", guess_max = 3000, na = c("Unreported", "NA"))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   start_date = col_datetime(format = ""),
##   sample_size = col_double(),
##   Day_inferred = col_logical(),
##   Date_registration_format = col_datetime(format = "")
## )
## ℹ Use `spec()` for the full column specifications.
# 15% sample needed for calculating contradictions
d_man <- read_xlsx("data/manual_processing/manual_extraction/Manual_extraction_all.xlsx")
## New names:
## * `` -> ...36
d <- d_import %>% 
  select(TrialID, study_arm:analyst_blind) %>%
  mutate_if(is.character, as.factor)

# convergence issues with source registry, so group infrequent categories
d <- d %>% 
  mutate(source_registry = fct_lump_n(d$source_registry, n = 6))

# for use in analysis 10
main_dataset_2 <- d[which(d$study_arm %in% c("covid", "main")),]
main_dataset_2$covid <- (main_dataset_2$study_arm == "covid")

indication_dataset_2 <- d[which(d$study_arm %in% c("covid", "im")),]
indication_dataset_2$covid <- (indication_dataset_2$study_arm == "covid")

1 Outlier check

hist(d_import$sample_size)

Manual check of some of the highest ones indicates that they are accurate.

# label variables for table1
label(d$control_arm) <- "Control arm"
label(d$randomisation) <- "Randomisation"
label(d$blinding) <- "Blinding"
label(d$prospective) <- "Prospective registration"
label(d$source_registry) <- "Source registry"
label(d$phase_clean) <- "Phase"
label(d$region_Africa) <- "Africa"
label(d$region_N_America) <- "North America"
label(d$region_L_America) <- "Latin America"
label(d$region_Asia) <- "Asia"
label(d$region_Europe) <- "Europe"
label(d$region_Oceania) <- "Oceania"
label(d$multicentre) <- "Multicentre"
label(d$primary_purpose) <- "Primary purpose"
label(d$sponsor_type) <- "Sponsor type"
label(d$sample_size) <- "Sample size"
label(d$vaccine) <- "Vaccine"
label(d$conventional) <- "Conventional"
label(d$traditional) <- "Traditional"
label(d$subject_blind) <- "Subject blind"
label(d$caregiver_blind) <- "Caregiver blind"
label(d$investigator_blind) <- "Investigator blind"
label(d$outcome_blind) <- "Outcome blind"
label(d$analyst_blind) <- "Analyst blind"

d_man <- d_man %>% 
  filter(Exclude != "Yes")

The following was used for quality control criterion #3.

table(d_man$ContradictionControlArm, useNA = "ifany")
## 
##  No 
## 372
table(d_man$ContradictionRandomisation, useNA = "ifany")
## 
##  No Yes 
## 369   3
table(d_man$ContradictionBlinding, useNA = "ifany")
## 
##  No Yes 
## 364   8
table(d_man$ContradictionProspectiveRegistration, useNA = "ifany")
## 
##  No Yes 
## 361  11
prop.table(table(d_man$ContradictionControlArm, useNA = "ifany")) * 100
## 
##  No 
## 100
prop.table(table(d_man$ContradictionRandomisation, useNA = "ifany")) * 100
## 
##         No        Yes 
## 99.1935484  0.8064516
prop.table(table(d_man$ContradictionBlinding, useNA = "ifany")) * 100
## 
##        No       Yes 
## 97.849462  2.150538
prop.table(table(d_man$ContradictionProspectiveRegistration, useNA = "ifany")) * 100
## 
##        No       Yes 
## 97.043011  2.956989

2 Descriptive

This is what the data look like before changing any of the variables as per the protocol (e.g. before changing randomisation = Not applicable to randomisation = No). This is Table 4 in the paper.

# make col headings nicer
d_tab <- d %>% 
  mutate(study_arm = 
           case_when(study_arm == "covid" ~ "Covid",
                     study_arm == "im" ~ "Indication matched", 
                     study_arm == "main" ~ "Main"))

# group together infrequent source registries for a nicer table
# d_tab <- d_tab %>% 
#   mutate(source_registry = fct_lump_n(d_tab$source_registry, n = 6))
  
table1(~ control_arm + randomisation + blinding + prospective + 
         source_registry + phase_clean + 
         region_Africa + region_N_America + region_L_America + 
         region_Asia + region_Europe + region_Oceania + 
         multicentre + primary_purpose + sponsor_type + sample_size + 
         vaccine + conventional + traditional + 
         subject_blind + caregiver_blind + investigator_blind + 
         outcome_blind + analyst_blind
       | study_arm, 
       data=d_tab, overall = "All",
       render.continuous=c(.="Mean (SD)", .="Median [Q1, Q3]"))
Covid
(N=818)
Indication matched
(N=839)
Main
(N=844)
All
(N=2501)
Control arm
No 105 (12.8%) 98 (11.7%) 211 (25.0%) 414 (16.6%)
Yes 713 (87.2%) 740 (88.2%) 632 (74.9%) 2085 (83.4%)
Missing 0 (0%) 1 (0.1%) 1 (0.1%) 2 (0.1%)
Randomisation
No 41 (5.0%) 45 (5.4%) 41 (4.9%) 127 (5.1%)
Not applicable 105 (12.8%) 98 (11.7%) 211 (25.0%) 414 (16.6%)
Yes 669 (81.8%) 693 (82.6%) 587 (69.5%) 1949 (77.9%)
Missing 3 (0.4%) 3 (0.4%) 5 (0.6%) 11 (0.4%)
Blinding
No 424 (51.8%) 296 (35.3%) 433 (51.3%) 1153 (46.1%)
Yes 366 (44.7%) 515 (61.4%) 387 (45.9%) 1268 (50.7%)
Missing 28 (3.4%) 28 (3.3%) 24 (2.8%) 80 (3.2%)
Prospective registration
No 259 (31.7%) 281 (33.5%) 222 (26.3%) 762 (30.5%)
Yes 559 (68.3%) 558 (66.5%) 622 (73.7%) 1739 (69.5%)
Source registry
ChiCTR 57 (7.0%) 42 (5.0%) 60 (7.1%) 159 (6.4%)
CT.gov 417 (51.0%) 455 (54.2%) 352 (41.7%) 1224 (48.9%)
CTRI 72 (8.8%) 27 (3.2%) 63 (7.5%) 162 (6.5%)
EUCTR 104 (12.7%) 145 (17.3%) 182 (21.6%) 431 (17.2%)
IRCT 109 (13.3%) 60 (7.2%) 58 (6.9%) 227 (9.1%)
JPRN 10 (1.2%) 45 (5.4%) 67 (7.9%) 122 (4.9%)
Other 49 (6.0%) 65 (7.7%) 62 (7.3%) 176 (7.0%)
Phase
Phase 1 85 (10.4%) 86 (10.3%) 109 (12.9%) 280 (11.2%)
Phase 2 293 (35.8%) 181 (21.6%) 250 (29.6%) 724 (28.9%)
Phase 3 280 (34.2%) 253 (30.2%) 235 (27.8%) 768 (30.7%)
Phase 4 61 (7.5%) 186 (22.2%) 118 (14.0%) 365 (14.6%)
Undefined 93 (11.4%) 111 (13.2%) 113 (13.4%) 317 (12.7%)
Missing 6 (0.7%) 22 (2.6%) 19 (2.3%) 47 (1.9%)
Africa
No 770 (94.1%) 771 (91.9%) 791 (93.7%) 2332 (93.2%)
Yes 48 (5.9%) 68 (8.1%) 53 (6.3%) 169 (6.8%)
North America
No 649 (79.3%) 582 (69.4%) 569 (67.4%) 1800 (72.0%)
Yes 169 (20.7%) 257 (30.6%) 275 (32.6%) 701 (28.0%)
Latin America
No 727 (88.9%) 760 (90.6%) 750 (88.9%) 2237 (89.4%)
Yes 91 (11.1%) 79 (9.4%) 94 (11.1%) 264 (10.6%)
Asia
No 458 (56.0%) 469 (55.9%) 360 (42.7%) 1287 (51.5%)
Yes 360 (44.0%) 370 (44.1%) 484 (57.3%) 1214 (48.5%)
Europe
No 596 (72.9%) 542 (64.6%) 540 (64.0%) 1678 (67.1%)
Yes 222 (27.1%) 297 (35.4%) 304 (36.0%) 823 (32.9%)
Oceania
No 802 (98.0%) 778 (92.7%) 759 (89.9%) 2339 (93.5%)
Yes 16 (2.0%) 61 (7.3%) 85 (10.1%) 162 (6.5%)
Multicentre
No 401 (49.0%) 411 (49.0%) 401 (47.5%) 1213 (48.5%)
Yes 373 (45.6%) 377 (44.9%) 384 (45.5%) 1134 (45.3%)
Missing 44 (5.4%) 51 (6.1%) 59 (7.0%) 154 (6.2%)
Primary purpose
Other 26 (3.2%) 101 (12.0%) 111 (13.2%) 238 (9.5%)
Prevention 121 (14.8%) 268 (31.9%) 88 (10.4%) 477 (19.1%)
Treatment 671 (82.0%) 470 (56.0%) 645 (76.4%) 1786 (71.4%)
Sponsor type
Industry 182 (22.2%) 261 (31.1%) 326 (38.6%) 769 (30.7%)
Investigator 52 (6.4%) 35 (4.2%) 42 (5.0%) 129 (5.2%)
Non industry 573 (70.0%) 536 (63.9%) 433 (51.3%) 1542 (61.7%)
Missing 11 (1.3%) 7 (0.8%) 43 (5.1%) 61 (2.4%)
Sample size
Mean (SD) 893 (5970) 723 (4820) 397 (4890) 668 (5250)
Median [Q1, Q3] 100 [50.0, 277] 120 [45.0, 308] 80.0 [40.0, 199] 100 [42.0, 260]
Missing 2 (0.2%) 2 (0.2%) 0 (0%) 4 (0.2%)
Vaccine
No 771 (94.3%) 595 (70.9%) 816 (96.7%) 2182 (87.2%)
Yes 47 (5.7%) 244 (29.1%) 28 (3.3%) 319 (12.8%)
Conventional
No 134 (16.4%) 273 (32.5%) 76 (9.0%) 483 (19.3%)
Yes 684 (83.6%) 566 (67.5%) 768 (91.0%) 2018 (80.7%)
Traditional
No 713 (87.2%) 776 (92.5%) 764 (90.5%) 2253 (90.1%)
Yes 105 (12.8%) 63 (7.5%) 80 (9.5%) 248 (9.9%)
Subject blind
No 424 (51.8%) 318 (37.9%) 416 (49.3%) 1158 (46.3%)
Yes 215 (26.3%) 268 (31.9%) 175 (20.7%) 658 (26.3%)
Missing 179 (21.9%) 253 (30.2%) 253 (30.0%) 685 (27.4%)
Caregiver blind
No 508 (62.1%) 420 (50.1%) 500 (59.2%) 1428 (57.1%)
Yes 131 (16.0%) 166 (19.8%) 87 (10.3%) 384 (15.4%)
Missing 179 (21.9%) 253 (30.2%) 257 (30.5%) 689 (27.5%)
Investigator blind
No 460 (56.2%) 330 (39.3%) 458 (54.3%) 1248 (49.9%)
Yes 179 (21.9%) 256 (30.5%) 133 (15.8%) 568 (22.7%)
Missing 179 (21.9%) 253 (30.2%) 253 (30.0%) 685 (27.4%)
Outcome blind
No 517 (63.2%) 401 (47.8%) 485 (57.5%) 1403 (56.1%)
Yes 122 (14.9%) 189 (22.5%) 103 (12.2%) 414 (16.6%)
Missing 179 (21.9%) 249 (29.7%) 256 (30.3%) 684 (27.3%)
Analyst blind
No 637 (77.9%) 583 (69.5%) 587 (69.5%) 1807 (72.3%)
Yes 2 (0.2%) 3 (0.4%) 0 (0%) 5 (0.2%)
Missing 179 (21.9%) 253 (30.2%) 257 (30.5%) 689 (27.5%)

We specified that we would treat unknown/unreported values in particular ways. We implement this here to generate the dataset for analysis.

d$randomisation[d$randomisation == "Not applicable"] <- "No"
d$randomisation[is.na(d$randomisation)]<- "No"
d$blinding[is.na(d$blinding)]<- "No"
d$multicentre[is.na(d$multicentre)] <- "No"

# this is a deviation agreed with the editor
d$control_arm[is.na(d$control_arm)] <- "No"

# d$sample_size <- log(d$sample_size)
# to impute: phase, sample size (for only 3 trials) and sponsor type

3 Check log sample size

Below we checked the linearity in the logit assumption as per the preregistration. Generally log transformed sample size appeared to be better.

# check linearity in logit 

d_check <- d
d_check <- d_check[-which(is.na(d_check$sample_size)), ]

quantile(d_check$sample_size, probs = seq(0, 1, 1/5), na.rm = T)
##       0%      20%      40%      60%      80%     100% 
##      1.0     38.0     70.0    133.0    342.8 140000.0
d_check <- d_check %>% 
  mutate(
    quintile_group = 
      case_when(sample_size <= 38 ~ "1",
                sample_size > 38 & sample_size <=70 ~ "2",
                sample_size > 70 & sample_size <=133 ~ "3",
                sample_size > 133 & sample_size <=342.6 ~ "4",
                sample_size > 342.6 ~ "5")
  ) 

table(d_check$control_arm)
## 
##   No  Yes 
##  413 2084
d_check$control_arm <- as.character(d_check$control_arm)
d_check[d_check$control_arm == "Yes",]$control_arm <- "1"
d_check[d_check$control_arm == "No",]$control_arm <- "0"
d_check$control_arm <- as.numeric(d_check$control_arm)

d_check$randomisation <- as.character(d_check$randomisation)
d_check[d_check$randomisation == "Yes",]$randomisation <- "1"
d_check[d_check$randomisation == "No",]$randomisation <- "0"
d_check$randomisation <- as.numeric(d_check$randomisation)

d_check$blinding <- as.character(d_check$blinding)
d_check[d_check$blinding == "Yes",]$blinding <- "1"
d_check[d_check$blinding == "No",]$blinding <- "0"
d_check$blinding <- as.numeric(d_check$blinding)

d_check$prospective <- as.character(d_check$prospective)
d_check[d_check$prospective == "Yes",]$prospective <- "1"
d_check[d_check$prospective == "No",]$prospective <- "0"
d_check$prospective <- as.numeric(d_check$prospective)

s <- d_check %>% 
  group_by(quintile_group) %>% 
  summarise(prop = sum(control_arm)/ length(control_arm),
            log_odds_control = log(prop / (1-prop)),
            prop = sum(randomisation)/ length(randomisation),
            log_odds_randomisation = log(prop / (1-prop)),
            prop = sum(blinding)/ length(blinding),
            log_odds_blinding = log(prop / (1-prop)),
            prop = sum(prospective)/ length(prospective),
            log_odds_prospective = log(prop / (1-prop)),
            median = median(sample_size),
            median_log = median(log(sample_size)))
## `summarise()` ungrouping output (override with `.groups` argument)
plot(s$median, s$log_odds_control)

plot(s$median, s$log_odds_randomisation)

plot(s$median, s$log_odds_blinding)

plot(s$median, s$log_odds_prospective)

plot(s$median_log, s$log_odds_control)

plot(s$median_log, s$log_odds_randomisation)

plot(s$median_log, s$log_odds_blinding)

plot(s$median_log, s$log_odds_prospective)

# log sample size seems better fit

d$sample_size <- log(d$sample_size)

4 Summary of data

summary(d)
##                 TrialID     study_arm     start_date                 
##  ChiCTR1900024129   :   2   covid:818   Min.   :1999-12-31 00:00:00  
##  JPRN-jRCTs011180015:   2   im   :839   1st Qu.:2018-08-27 00:00:00  
##  NCT03814720        :   2   main :844   Median :2019-08-21 12:00:00  
##  NCT03982069        :   2               Mean   :2019-02-24 02:55:12  
##  NCT04157998        :   2               3rd Qu.:2020-04-27 00:00:00  
##  NCT04161339        :   2               Max.   :2021-08-01 00:00:00  
##  (Other)            :2489               NA's   :93                   
##  control_arm        randomisation  blinding   prospective source_registry
##  No : 416    No            : 552   No :1233   No : 762    ChiCTR: 159    
##  Yes:2085    Not applicable:   0   Yes:1268   Yes:1739    CT.gov:1224    
##              Yes           :1949                          CTRI  : 162    
##                                                           EUCTR : 431    
##                                                           IRCT  : 227    
##                                                           JPRN  : 122    
##                                                           Other : 176    
##     phase_clean  region_Africa region_N_America region_L_America region_Asia
##  Phase 1  :280   No :2332      No :1800         No :2237         No :1287   
##  Phase 2  :724   Yes: 169      Yes: 701         Yes: 264         Yes:1214   
##  Phase 3  :768                                                              
##  Phase 4  :365                                                              
##  Undefined:317                                                              
##  NA's     : 47                                                              
##                                                                             
##  region_Europe region_Oceania multicentre   primary_purpose       sponsor_type 
##  No :1678      No :2339       No :1367    Other     : 238   Industry    : 769  
##  Yes: 823      Yes: 162       Yes:1134    Prevention: 477   Investigator: 129  
##                                           Treatment :1786   Non industry:1542  
##                                                             NA's        :  61  
##                                                                                
##                                                                                
##                                                                                
##   sample_size     vaccine    conventional traditional subject_blind
##  Min.   : 0.000   No :2182   No : 483     No :2253    No  :1158    
##  1st Qu.: 3.738   Yes: 319   Yes:2018     Yes: 248    Yes : 658    
##  Median : 4.605                                       NA's: 685    
##  Mean   : 4.752                                                    
##  3rd Qu.: 5.561                                                    
##  Max.   :11.849                                                    
##  NA's   :4                                                         
##  caregiver_blind investigator_blind outcome_blind analyst_blind
##  No  :1428       No  :1248          No  :1403     No  :1807    
##  Yes : 384       Yes : 568          Yes : 414     Yes :   5    
##  NA's: 689       NA's: 685          NA's: 684     NA's: 689    
##                                                                
##                                                                
##                                                                
## 

5 Summary of missing data

# mice::md.pattern(d)
VIM::aggr(d, numbers = TRUE, sortVars = TRUE, labels = names(data), cex.axis = 0.7, gap = 3, ylab = c("Proportion of missingness", "Missingness Pattern"), oma = c(10, 5, 5, 3))

## 
##  Variables sorted by number of missings: 
##            Variable      Count
##     caregiver_blind 0.27548980
##       analyst_blind 0.27548980
##       subject_blind 0.27389044
##  investigator_blind 0.27389044
##       outcome_blind 0.27349060
##          start_date 0.03718513
##        sponsor_type 0.02439024
##         phase_clean 0.01879248
##         sample_size 0.00159936
##             TrialID 0.00000000
##           study_arm 0.00000000
##         control_arm 0.00000000
##       randomisation 0.00000000
##            blinding 0.00000000
##         prospective 0.00000000
##     source_registry 0.00000000
##       region_Africa 0.00000000
##    region_N_America 0.00000000
##    region_L_America 0.00000000
##         region_Asia 0.00000000
##       region_Europe 0.00000000
##      region_Oceania 0.00000000
##         multicentre 0.00000000
##     primary_purpose 0.00000000
##             vaccine 0.00000000
##        conventional 0.00000000
##         traditional 0.00000000
# VIM::marginplot(d[,c(1, 2)])

6 Power analysis

We calculated power as follows. This code is retained here as some of the values are used later in the script.

# inputs for power analysis 
n_comparisons <- 4
total_alpha <- 0.05
alpha <- total_alpha/n_comparisons
z <- qnorm(1-alpha/2)
power <- 0.95
p1 <- 0.5 # probability of outcome in COVID group
p2 <- 0.6 # probability of outcome in comparator group
prop <- 0.5 # proportion of sample in comparator group

# power calculation 
sample_size <- SSizeLogisticBin(p1 = p1, p2 = p2, B = prop, alpha = alpha, power = power)

The required sample size with power = 95% and alpha = 0.05 for 4 comparisons, to detect a 10% difference between groups assuming 0.5 probability of outcome in the COVID-19 group is 1693.

7 Analysis

7.1 Some more summaries

table(d$study_arm, useNA = "ifany")
## 
## covid    im  main 
##   818   839   844

Generate main and indication-matched datasets:

main_dataset <- d[which(d$study_arm %in% c("covid", "main")),]
main_dataset$covid <- (main_dataset$study_arm == "covid")


indication_dataset <- d[which(d$study_arm %in% c("covid", "im")),]
indication_dataset$covid <- (indication_dataset$study_arm == "covid")

7.2 Tables of each outcome by other variables

Note that these have log sample size.

7.2.1 Control arm

table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * control_arm, data = d)
covid
im
main
Overall
No
(N=105)
Yes
(N=713)
No
(N=99)
Yes
(N=740)
No
(N=212)
Yes
(N=632)
No
(N=416)
Yes
(N=2085)
Control arm
No 105 (100%) 0 (0%) 99 (100%) 0 (0%) 212 (100%) 0 (0%) 416 (100%) 0 (0%)
Yes 0 (0%) 713 (100%) 0 (0%) 740 (100%) 0 (0%) 632 (100%) 0 (0%) 2085 (100%)
Randomisation
No 105 (100%) 44 (6.2%) 99 (100%) 47 (6.4%) 212 (100%) 45 (7.1%) 416 (100%) 136 (6.5%)
Not applicable 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Yes 0 (0%) 669 (93.8%) 0 (0%) 693 (93.6%) 0 (0%) 587 (92.9%) 0 (0%) 1949 (93.5%)
Blinding
No 105 (100%) 347 (48.7%) 98 (99.0%) 226 (30.5%) 212 (100%) 245 (38.8%) 415 (99.8%) 818 (39.2%)
Yes 0 (0%) 366 (51.3%) 1 (1.0%) 514 (69.5%) 0 (0%) 387 (61.2%) 1 (0.2%) 1267 (60.8%)
Prospective registration
No 38 (36.2%) 221 (31.0%) 37 (37.4%) 244 (33.0%) 58 (27.4%) 164 (25.9%) 133 (32.0%) 629 (30.2%)
Yes 67 (63.8%) 492 (69.0%) 62 (62.6%) 496 (67.0%) 154 (72.6%) 468 (74.1%) 283 (68.0%) 1456 (69.8%)
Source registry
ChiCTR 5 (4.8%) 52 (7.3%) 2 (2.0%) 40 (5.4%) 2 (0.9%) 58 (9.2%) 9 (2.2%) 150 (7.2%)
CT.gov 55 (52.4%) 362 (50.8%) 49 (49.5%) 406 (54.9%) 95 (44.8%) 257 (40.7%) 199 (47.8%) 1025 (49.2%)
CTRI 12 (11.4%) 60 (8.4%) 0 (0%) 27 (3.6%) 12 (5.7%) 51 (8.1%) 24 (5.8%) 138 (6.6%)
EUCTR 11 (10.5%) 93 (13.0%) 20 (20.2%) 125 (16.9%) 57 (26.9%) 125 (19.8%) 88 (21.2%) 343 (16.5%)
IRCT 10 (9.5%) 99 (13.9%) 1 (1.0%) 59 (8.0%) 3 (1.4%) 55 (8.7%) 14 (3.4%) 213 (10.2%)
JPRN 4 (3.8%) 6 (0.8%) 20 (20.2%) 25 (3.4%) 27 (12.7%) 40 (6.3%) 51 (12.3%) 71 (3.4%)
Other 8 (7.6%) 41 (5.8%) 7 (7.1%) 58 (7.8%) 16 (7.5%) 46 (7.3%) 31 (7.5%) 145 (7.0%)
Phase
Phase 1 23 (21.9%) 62 (8.7%) 18 (18.2%) 68 (9.2%) 28 (13.2%) 81 (12.8%) 69 (16.6%) 211 (10.1%)
Phase 2 35 (33.3%) 258 (36.2%) 15 (15.2%) 166 (22.4%) 115 (54.2%) 135 (21.4%) 165 (39.7%) 559 (26.8%)
Phase 3 16 (15.2%) 264 (37.0%) 13 (13.1%) 240 (32.4%) 24 (11.3%) 211 (33.4%) 53 (12.7%) 715 (34.3%)
Phase 4 3 (2.9%) 58 (8.1%) 31 (31.3%) 155 (20.9%) 21 (9.9%) 97 (15.3%) 55 (13.2%) 310 (14.9%)
Undefined 27 (25.7%) 66 (9.3%) 12 (12.1%) 99 (13.4%) 13 (6.1%) 100 (15.8%) 52 (12.5%) 265 (12.7%)
Missing 1 (1.0%) 5 (0.7%) 10 (10.1%) 12 (1.6%) 11 (5.2%) 8 (1.3%) 22 (5.3%) 25 (1.2%)
Africa
No 103 (98.1%) 667 (93.5%) 95 (96.0%) 676 (91.4%) 206 (97.2%) 585 (92.6%) 404 (97.1%) 1928 (92.5%)
Yes 2 (1.9%) 46 (6.5%) 4 (4.0%) 64 (8.6%) 6 (2.8%) 47 (7.4%) 12 (2.9%) 157 (7.5%)
North America
No 78 (74.3%) 571 (80.1%) 71 (71.7%) 511 (69.1%) 134 (63.2%) 435 (68.8%) 283 (68.0%) 1517 (72.8%)
Yes 27 (25.7%) 142 (19.9%) 28 (28.3%) 229 (30.9%) 78 (36.8%) 197 (31.2%) 133 (32.0%) 568 (27.2%)
Latin America
No 93 (88.6%) 634 (88.9%) 96 (97.0%) 664 (89.7%) 200 (94.3%) 550 (87.0%) 389 (93.5%) 1848 (88.6%)
Yes 12 (11.4%) 79 (11.1%) 3 (3.0%) 76 (10.3%) 12 (5.7%) 82 (13.0%) 27 (6.5%) 237 (11.4%)
Asia
No 65 (61.9%) 393 (55.1%) 53 (53.5%) 416 (56.2%) 99 (46.7%) 261 (41.3%) 217 (52.2%) 1070 (51.3%)
Yes 40 (38.1%) 320 (44.9%) 46 (46.5%) 324 (43.8%) 113 (53.3%) 371 (58.7%) 199 (47.8%) 1015 (48.7%)
Europe
No 79 (75.2%) 517 (72.5%) 66 (66.7%) 476 (64.3%) 129 (60.8%) 411 (65.0%) 274 (65.9%) 1404 (67.3%)
Yes 26 (24.8%) 196 (27.5%) 33 (33.3%) 264 (35.7%) 83 (39.2%) 221 (35.0%) 142 (34.1%) 681 (32.7%)
Oceania
No 104 (99.0%) 698 (97.9%) 94 (94.9%) 684 (92.4%) 192 (90.6%) 567 (89.7%) 390 (93.8%) 1949 (93.5%)
Yes 1 (1.0%) 15 (2.1%) 5 (5.1%) 56 (7.6%) 20 (9.4%) 65 (10.3%) 26 (6.3%) 136 (6.5%)
Multicentre
No 71 (67.6%) 374 (52.5%) 68 (68.7%) 394 (53.2%) 102 (48.1%) 358 (56.6%) 241 (57.9%) 1126 (54.0%)
Yes 34 (32.4%) 339 (47.5%) 31 (31.3%) 346 (46.8%) 110 (51.9%) 274 (43.4%) 175 (42.1%) 959 (46.0%)
Primary purpose
Other 5 (4.8%) 21 (2.9%) 28 (28.3%) 73 (9.9%) 17 (8.0%) 94 (14.9%) 50 (12.0%) 188 (9.0%)
Prevention 14 (13.3%) 107 (15.0%) 27 (27.3%) 241 (32.6%) 10 (4.7%) 78 (12.3%) 51 (12.3%) 426 (20.4%)
Treatment 86 (81.9%) 585 (82.0%) 44 (44.4%) 426 (57.6%) 185 (87.3%) 460 (72.8%) 315 (75.7%) 1471 (70.6%)
Sponsor type
Industry 20 (19.0%) 162 (22.7%) 29 (29.3%) 232 (31.4%) 89 (42.0%) 237 (37.5%) 138 (33.2%) 631 (30.3%)
Investigator 9 (8.6%) 43 (6.0%) 3 (3.0%) 32 (4.3%) 12 (5.7%) 30 (4.7%) 24 (5.8%) 105 (5.0%)
Non industry 72 (68.6%) 501 (70.3%) 64 (64.6%) 472 (63.8%) 94 (44.3%) 339 (53.6%) 230 (55.3%) 1312 (62.9%)
Missing 4 (3.8%) 7 (1.0%) 3 (3.0%) 4 (0.5%) 17 (8.0%) 26 (4.1%) 24 (5.8%) 37 (1.8%)
Sample size
Mean (SD) 3.88 (1.36) 4.99 (1.44) 3.81 (1.35) 5.05 (1.49) 3.76 (1.13) 4.76 (1.21) 3.80 (1.25) 4.94 (1.40)
Median [Min, Max] 3.69 [1.79, 11.8] 4.61 [0, 11.0] 3.47 [0, 7.60] 4.86 [0, 11.7] 3.69 [0.693, 7.58] 4.61 [0, 11.8] 3.66 [0, 11.8] 4.70 [0, 11.8]
Missing 2 (1.9%) 0 (0%) 1 (1.0%) 1 (0.1%) 0 (0%) 0 (0%) 3 (0.7%) 1 (0.0%)
Vaccine
No 100 (95.2%) 671 (94.1%) 64 (64.6%) 531 (71.8%) 203 (95.8%) 613 (97.0%) 367 (88.2%) 1815 (87.1%)
Yes 5 (4.8%) 42 (5.9%) 35 (35.4%) 209 (28.2%) 9 (4.2%) 19 (3.0%) 49 (11.8%) 270 (12.9%)
Conventional
No 14 (13.3%) 120 (16.8%) 35 (35.4%) 238 (32.2%) 12 (5.7%) 64 (10.1%) 61 (14.7%) 422 (20.2%)
Yes 91 (86.7%) 593 (83.2%) 64 (64.6%) 502 (67.8%) 200 (94.3%) 568 (89.9%) 355 (85.3%) 1663 (79.8%)
Traditional
No 96 (91.4%) 617 (86.5%) 99 (100%) 677 (91.5%) 205 (96.7%) 559 (88.4%) 400 (96.2%) 1853 (88.9%)
Yes 9 (8.6%) 96 (13.5%) 0 (0%) 63 (8.5%) 7 (3.3%) 73 (11.6%) 16 (3.8%) 232 (11.1%)
Subject blind
No 96 (91.4%) 328 (46.0%) 80 (80.8%) 238 (32.2%) 198 (93.4%) 218 (34.5%) 374 (89.9%) 784 (37.6%)
Yes 1 (1.0%) 214 (30.0%) 0 (0%) 268 (36.2%) 0 (0%) 175 (27.7%) 1 (0.2%) 657 (31.5%)
Missing 8 (7.6%) 171 (24.0%) 19 (19.2%) 234 (31.6%) 14 (6.6%) 239 (37.8%) 41 (9.9%) 644 (30.9%)
Caregiver blind
No 96 (91.4%) 412 (57.8%) 80 (80.8%) 340 (45.9%) 198 (93.4%) 302 (47.8%) 374 (89.9%) 1054 (50.6%)
Yes 1 (1.0%) 130 (18.2%) 0 (0%) 166 (22.4%) 0 (0%) 87 (13.8%) 1 (0.2%) 383 (18.4%)
Missing 8 (7.6%) 171 (24.0%) 19 (19.2%) 234 (31.6%) 14 (6.6%) 243 (38.4%) 41 (9.9%) 648 (31.1%)
Investigator blind
No 96 (91.4%) 364 (51.1%) 80 (80.8%) 250 (33.8%) 198 (93.4%) 260 (41.1%) 374 (89.9%) 874 (41.9%)
Yes 1 (1.0%) 178 (25.0%) 0 (0%) 256 (34.6%) 0 (0%) 133 (21.0%) 1 (0.2%) 567 (27.2%)
Missing 8 (7.6%) 171 (24.0%) 19 (19.2%) 234 (31.6%) 14 (6.6%) 239 (37.8%) 41 (9.9%) 644 (30.9%)
Outcome blind
No 96 (91.4%) 421 (59.0%) 80 (80.8%) 321 (43.4%) 198 (93.4%) 287 (45.4%) 374 (89.9%) 1029 (49.4%)
Yes 1 (1.0%) 121 (17.0%) 1 (1.0%) 188 (25.4%) 0 (0%) 103 (16.3%) 2 (0.5%) 412 (19.8%)
Missing 8 (7.6%) 171 (24.0%) 18 (18.2%) 231 (31.2%) 14 (6.6%) 242 (38.3%) 40 (9.6%) 644 (30.9%)
Analyst blind
No 97 (92.4%) 540 (75.7%) 80 (80.8%) 503 (68.0%) 198 (93.4%) 389 (61.6%) 375 (90.1%) 1432 (68.7%)
Yes 0 (0%) 2 (0.3%) 0 (0%) 3 (0.4%) 0 (0%) 0 (0%) 0 (0%) 5 (0.2%)
Missing 8 (7.6%) 171 (24.0%) 19 (19.2%) 234 (31.6%) 14 (6.6%) 243 (38.4%) 41 (9.9%) 648 (31.1%)

7.2.2 Randomisation

table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * randomisation, data = d)
covid
im
main
Overall
No
(N=149)
Yes
(N=669)
No
(N=146)
Yes
(N=693)
No
(N=257)
Yes
(N=587)
No
(N=552)
Yes
(N=1949)
Control arm
No 105 (70.5%) 0 (0%) 99 (67.8%) 0 (0%) 212 (82.5%) 0 (0%) 416 (75.4%) 0 (0%)
Yes 44 (29.5%) 669 (100%) 47 (32.2%) 693 (100%) 45 (17.5%) 587 (100%) 136 (24.6%) 1949 (100%)
Randomisation
No 149 (100%) 0 (0%) 146 (100%) 0 (0%) 257 (100%) 0 (0%) 552 (100%) 0 (0%)
Not applicable 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Yes 0 (0%) 669 (100%) 0 (0%) 693 (100%) 0 (0%) 587 (100%) 0 (0%) 1949 (100%)
Blinding
No 147 (98.7%) 305 (45.6%) 134 (91.8%) 190 (27.4%) 254 (98.8%) 203 (34.6%) 535 (96.9%) 698 (35.8%)
Yes 2 (1.3%) 364 (54.4%) 12 (8.2%) 503 (72.6%) 3 (1.2%) 384 (65.4%) 17 (3.1%) 1251 (64.2%)
Prospective registration
No 57 (38.3%) 202 (30.2%) 58 (39.7%) 223 (32.2%) 69 (26.8%) 153 (26.1%) 184 (33.3%) 578 (29.7%)
Yes 92 (61.7%) 467 (69.8%) 88 (60.3%) 470 (67.8%) 188 (73.2%) 434 (73.9%) 368 (66.7%) 1371 (70.3%)
Source registry
ChiCTR 11 (7.4%) 46 (6.9%) 5 (3.4%) 37 (5.3%) 3 (1.2%) 57 (9.7%) 19 (3.4%) 140 (7.2%)
CT.gov 78 (52.3%) 339 (50.7%) 83 (56.8%) 372 (53.7%) 122 (47.5%) 230 (39.2%) 283 (51.3%) 941 (48.3%)
CTRI 15 (10.1%) 57 (8.5%) 0 (0%) 27 (3.9%) 15 (5.8%) 48 (8.2%) 30 (5.4%) 132 (6.8%)
EUCTR 12 (8.1%) 92 (13.8%) 21 (14.4%) 124 (17.9%) 62 (24.1%) 120 (20.4%) 95 (17.2%) 336 (17.2%)
IRCT 17 (11.4%) 92 (13.8%) 6 (4.1%) 54 (7.8%) 8 (3.1%) 50 (8.5%) 31 (5.6%) 196 (10.1%)
JPRN 4 (2.7%) 6 (0.9%) 22 (15.1%) 23 (3.3%) 30 (11.7%) 37 (6.3%) 56 (10.1%) 66 (3.4%)
Other 12 (8.1%) 37 (5.5%) 9 (6.2%) 56 (8.1%) 17 (6.6%) 45 (7.7%) 38 (6.9%) 138 (7.1%)
Phase
Phase 1 33 (22.1%) 52 (7.8%) 26 (17.8%) 60 (8.7%) 36 (14.0%) 73 (12.4%) 95 (17.2%) 185 (9.5%)
Phase 2 51 (34.2%) 242 (36.2%) 19 (13.0%) 162 (23.4%) 135 (52.5%) 115 (19.6%) 205 (37.1%) 519 (26.6%)
Phase 3 23 (15.4%) 257 (38.4%) 22 (15.1%) 231 (33.3%) 31 (12.1%) 204 (34.8%) 76 (13.8%) 692 (35.5%)
Phase 4 4 (2.7%) 57 (8.5%) 50 (34.2%) 136 (19.6%) 23 (8.9%) 95 (16.2%) 77 (13.9%) 288 (14.8%)
Undefined 34 (22.8%) 59 (8.8%) 18 (12.3%) 93 (13.4%) 19 (7.4%) 94 (16.0%) 71 (12.9%) 246 (12.6%)
Missing 4 (2.7%) 2 (0.3%) 11 (7.5%) 11 (1.6%) 13 (5.1%) 6 (1.0%) 28 (5.1%) 19 (1.0%)
Africa
No 146 (98.0%) 624 (93.3%) 142 (97.3%) 629 (90.8%) 247 (96.1%) 544 (92.7%) 535 (96.9%) 1797 (92.2%)
Yes 3 (2.0%) 45 (6.7%) 4 (2.7%) 64 (9.2%) 10 (3.9%) 43 (7.3%) 17 (3.1%) 152 (7.8%)
North America
No 111 (74.5%) 538 (80.4%) 97 (66.4%) 485 (70.0%) 160 (62.3%) 409 (69.7%) 368 (66.7%) 1432 (73.5%)
Yes 38 (25.5%) 131 (19.6%) 49 (33.6%) 208 (30.0%) 97 (37.7%) 178 (30.3%) 184 (33.3%) 517 (26.5%)
Latin America
No 135 (90.6%) 592 (88.5%) 142 (97.3%) 618 (89.2%) 245 (95.3%) 505 (86.0%) 522 (94.6%) 1715 (88.0%)
Yes 14 (9.4%) 77 (11.5%) 4 (2.7%) 75 (10.8%) 12 (4.7%) 82 (14.0%) 30 (5.4%) 234 (12.0%)
Asia
No 89 (59.7%) 369 (55.2%) 83 (56.8%) 386 (55.7%) 125 (48.6%) 235 (40.0%) 297 (53.8%) 990 (50.8%)
Yes 60 (40.3%) 300 (44.8%) 63 (43.2%) 307 (44.3%) 132 (51.4%) 352 (60.0%) 255 (46.2%) 959 (49.2%)
Europe
No 111 (74.5%) 485 (72.5%) 104 (71.2%) 438 (63.2%) 158 (61.5%) 382 (65.1%) 373 (67.6%) 1305 (67.0%)
Yes 38 (25.5%) 184 (27.5%) 42 (28.8%) 255 (36.8%) 99 (38.5%) 205 (34.9%) 179 (32.4%) 644 (33.0%)
Oceania
No 147 (98.7%) 655 (97.9%) 140 (95.9%) 638 (92.1%) 234 (91.1%) 525 (89.4%) 521 (94.4%) 1818 (93.3%)
Yes 2 (1.3%) 14 (2.1%) 6 (4.1%) 55 (7.9%) 23 (8.9%) 62 (10.6%) 31 (5.6%) 131 (6.7%)
Multicentre
No 105 (70.5%) 340 (50.8%) 104 (71.2%) 358 (51.7%) 127 (49.4%) 333 (56.7%) 336 (60.9%) 1031 (52.9%)
Yes 44 (29.5%) 329 (49.2%) 42 (28.8%) 335 (48.3%) 130 (50.6%) 254 (43.3%) 216 (39.1%) 918 (47.1%)
Primary purpose
Other 8 (5.4%) 18 (2.7%) 42 (28.8%) 59 (8.5%) 25 (9.7%) 86 (14.7%) 75 (13.6%) 163 (8.4%)
Prevention 26 (17.4%) 95 (14.2%) 44 (30.1%) 224 (32.3%) 13 (5.1%) 75 (12.8%) 83 (15.0%) 394 (20.2%)
Treatment 115 (77.2%) 556 (83.1%) 60 (41.1%) 410 (59.2%) 219 (85.2%) 426 (72.6%) 394 (71.4%) 1392 (71.4%)
Sponsor type
Industry 27 (18.1%) 155 (23.2%) 37 (25.3%) 224 (32.3%) 108 (42.0%) 218 (37.1%) 172 (31.2%) 597 (30.6%)
Investigator 11 (7.4%) 41 (6.1%) 4 (2.7%) 31 (4.5%) 14 (5.4%) 28 (4.8%) 29 (5.3%) 100 (5.1%)
Non industry 107 (71.8%) 466 (69.7%) 102 (69.9%) 434 (62.6%) 118 (45.9%) 315 (53.7%) 327 (59.2%) 1215 (62.3%)
Missing 4 (2.7%) 7 (1.0%) 3 (2.1%) 4 (0.6%) 17 (6.6%) 26 (4.4%) 24 (4.3%) 37 (1.9%)
Sample size
Mean (SD) 3.94 (1.38) 5.05 (1.42) 4.00 (1.31) 5.09 (1.51) 3.81 (1.15) 4.81 (1.19) 3.90 (1.26) 4.99 (1.39)
Median [Min, Max] 3.69 [0, 11.8] 4.68 [1.95, 11.0] 3.69 [0, 7.60] 4.92 [0, 11.7] 3.69 [0.693, 8.22] 4.61 [0, 11.8] 3.69 [0, 11.8] 4.79 [0, 11.8]
Missing 2 (1.3%) 0 (0%) 1 (0.7%) 1 (0.1%) 0 (0%) 0 (0%) 3 (0.5%) 1 (0.1%)
Vaccine
No 138 (92.6%) 633 (94.6%) 84 (57.5%) 511 (73.7%) 245 (95.3%) 571 (97.3%) 467 (84.6%) 1715 (88.0%)
Yes 11 (7.4%) 36 (5.4%) 62 (42.5%) 182 (26.3%) 12 (4.7%) 16 (2.7%) 85 (15.4%) 234 (12.0%)
Conventional
No 23 (15.4%) 111 (16.6%) 62 (42.5%) 211 (30.4%) 16 (6.2%) 60 (10.2%) 101 (18.3%) 382 (19.6%)
Yes 126 (84.6%) 558 (83.4%) 84 (57.5%) 482 (69.6%) 241 (93.8%) 527 (89.8%) 451 (81.7%) 1567 (80.4%)
Traditional
No 135 (90.6%) 578 (86.4%) 146 (100%) 630 (90.9%) 247 (96.1%) 517 (88.1%) 528 (95.7%) 1725 (88.5%)
Yes 14 (9.4%) 91 (13.6%) 0 (0%) 63 (9.1%) 10 (3.9%) 70 (11.9%) 24 (4.3%) 224 (11.5%)
Subject blind
No 133 (89.3%) 291 (43.5%) 116 (79.5%) 202 (29.1%) 236 (91.8%) 180 (30.7%) 485 (87.9%) 673 (34.5%)
Yes 1 (0.7%) 214 (32.0%) 1 (0.7%) 267 (38.5%) 0 (0%) 175 (29.8%) 2 (0.4%) 656 (33.7%)
Missing 15 (10.1%) 164 (24.5%) 29 (19.9%) 224 (32.3%) 21 (8.2%) 232 (39.5%) 65 (11.8%) 620 (31.8%)
Caregiver blind
No 133 (89.3%) 375 (56.1%) 117 (80.1%) 303 (43.7%) 236 (91.8%) 264 (45.0%) 486 (88.0%) 942 (48.3%)
Yes 1 (0.7%) 130 (19.4%) 0 (0%) 166 (24.0%) 0 (0%) 87 (14.8%) 1 (0.2%) 383 (19.7%)
Missing 15 (10.1%) 164 (24.5%) 29 (19.9%) 224 (32.3%) 21 (8.2%) 236 (40.2%) 65 (11.8%) 624 (32.0%)
Investigator blind
No 133 (89.3%) 327 (48.9%) 117 (80.1%) 213 (30.7%) 236 (91.8%) 222 (37.8%) 486 (88.0%) 762 (39.1%)
Yes 1 (0.7%) 178 (26.6%) 0 (0%) 256 (36.9%) 0 (0%) 133 (22.7%) 1 (0.2%) 567 (29.1%)
Missing 15 (10.1%) 164 (24.5%) 29 (19.9%) 224 (32.3%) 21 (8.2%) 232 (39.5%) 65 (11.8%) 620 (31.8%)
Outcome blind
No 132 (88.6%) 385 (57.5%) 115 (78.8%) 286 (41.3%) 236 (91.8%) 249 (42.4%) 483 (87.5%) 920 (47.2%)
Yes 2 (1.3%) 120 (17.9%) 5 (3.4%) 184 (26.6%) 0 (0%) 103 (17.5%) 7 (1.3%) 407 (20.9%)
Missing 15 (10.1%) 164 (24.5%) 26 (17.8%) 223 (32.2%) 21 (8.2%) 235 (40.0%) 62 (11.2%) 622 (31.9%)
Analyst blind
No 134 (89.9%) 503 (75.2%) 117 (80.1%) 466 (67.2%) 236 (91.8%) 351 (59.8%) 487 (88.2%) 1320 (67.7%)
Yes 0 (0%) 2 (0.3%) 0 (0%) 3 (0.4%) 0 (0%) 0 (0%) 0 (0%) 5 (0.3%)
Missing 15 (10.1%) 164 (24.5%) 29 (19.9%) 224 (32.3%) 21 (8.2%) 236 (40.2%) 65 (11.8%) 624 (32.0%)

7.2.3 Blinding

table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * blinding, data = d)
covid
im
main
Overall
No
(N=452)
Yes
(N=366)
No
(N=324)
Yes
(N=515)
No
(N=457)
Yes
(N=387)
No
(N=1233)
Yes
(N=1268)
Control arm
No 105 (23.2%) 0 (0%) 98 (30.2%) 1 (0.2%) 212 (46.4%) 0 (0%) 415 (33.7%) 1 (0.1%)
Yes 347 (76.8%) 366 (100%) 226 (69.8%) 514 (99.8%) 245 (53.6%) 387 (100%) 818 (66.3%) 1267 (99.9%)
Randomisation
No 147 (32.5%) 2 (0.5%) 134 (41.4%) 12 (2.3%) 254 (55.6%) 3 (0.8%) 535 (43.4%) 17 (1.3%)
Not applicable 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Yes 305 (67.5%) 364 (99.5%) 190 (58.6%) 503 (97.7%) 203 (44.4%) 384 (99.2%) 698 (56.6%) 1251 (98.7%)
Blinding
No 452 (100%) 0 (0%) 324 (100%) 0 (0%) 457 (100%) 0 (0%) 1233 (100%) 0 (0%)
Yes 0 (0%) 366 (100%) 0 (0%) 515 (100%) 0 (0%) 387 (100%) 0 (0%) 1268 (100%)
Prospective registration
No 161 (35.6%) 98 (26.8%) 115 (35.5%) 166 (32.2%) 124 (27.1%) 98 (25.3%) 400 (32.4%) 362 (28.5%)
Yes 291 (64.4%) 268 (73.2%) 209 (64.5%) 349 (67.8%) 333 (72.9%) 289 (74.7%) 833 (67.6%) 906 (71.5%)
Source registry
ChiCTR 46 (10.2%) 11 (3.0%) 27 (8.3%) 15 (2.9%) 35 (7.7%) 25 (6.5%) 108 (8.8%) 51 (4.0%)
CT.gov 205 (45.4%) 212 (57.9%) 156 (48.1%) 299 (58.1%) 197 (43.1%) 155 (40.1%) 558 (45.3%) 666 (52.5%)
CTRI 54 (11.9%) 18 (4.9%) 11 (3.4%) 16 (3.1%) 30 (6.6%) 33 (8.5%) 95 (7.7%) 67 (5.3%)
EUCTR 54 (11.9%) 50 (13.7%) 51 (15.7%) 94 (18.3%) 97 (21.2%) 85 (22.0%) 202 (16.4%) 229 (18.1%)
IRCT 58 (12.8%) 51 (13.9%) 21 (6.5%) 39 (7.6%) 16 (3.5%) 42 (10.9%) 95 (7.7%) 132 (10.4%)
JPRN 7 (1.5%) 3 (0.8%) 30 (9.3%) 15 (2.9%) 52 (11.4%) 15 (3.9%) 89 (7.2%) 33 (2.6%)
Other 28 (6.2%) 21 (5.7%) 28 (8.6%) 37 (7.2%) 30 (6.6%) 32 (8.3%) 86 (7.0%) 90 (7.1%)
Phase
Phase 1 53 (11.7%) 32 (8.7%) 44 (13.6%) 42 (8.2%) 69 (15.1%) 40 (10.3%) 166 (13.5%) 114 (9.0%)
Phase 2 154 (34.1%) 139 (38.0%) 41 (12.7%) 140 (27.2%) 166 (36.3%) 84 (21.7%) 361 (29.3%) 363 (28.6%)
Phase 3 134 (29.6%) 146 (39.9%) 72 (22.2%) 181 (35.1%) 95 (20.8%) 140 (36.2%) 301 (24.4%) 467 (36.8%)
Phase 4 43 (9.5%) 18 (4.9%) 96 (29.6%) 90 (17.5%) 67 (14.7%) 51 (13.2%) 206 (16.7%) 159 (12.5%)
Undefined 65 (14.4%) 28 (7.7%) 54 (16.7%) 57 (11.1%) 42 (9.2%) 71 (18.3%) 161 (13.1%) 156 (12.3%)
Missing 3 (0.7%) 3 (0.8%) 17 (5.2%) 5 (1.0%) 18 (3.9%) 1 (0.3%) 38 (3.1%) 9 (0.7%)
Africa
No 431 (95.4%) 339 (92.6%) 310 (95.7%) 461 (89.5%) 436 (95.4%) 355 (91.7%) 1177 (95.5%) 1155 (91.1%)
Yes 21 (4.6%) 27 (7.4%) 14 (4.3%) 54 (10.5%) 21 (4.6%) 32 (8.3%) 56 (4.5%) 113 (8.9%)
North America
No 378 (83.6%) 271 (74.0%) 231 (71.3%) 351 (68.2%) 313 (68.5%) 256 (66.1%) 922 (74.8%) 878 (69.2%)
Yes 74 (16.4%) 95 (26.0%) 93 (28.7%) 164 (31.8%) 144 (31.5%) 131 (33.9%) 311 (25.2%) 390 (30.8%)
Latin America
No 418 (92.5%) 309 (84.4%) 308 (95.1%) 452 (87.8%) 425 (93.0%) 325 (84.0%) 1151 (93.3%) 1086 (85.6%)
Yes 34 (7.5%) 57 (15.6%) 16 (4.9%) 63 (12.2%) 32 (7.0%) 62 (16.0%) 82 (6.7%) 182 (14.4%)
Asia
No 235 (52.0%) 223 (60.9%) 173 (53.4%) 296 (57.5%) 194 (42.5%) 166 (42.9%) 602 (48.8%) 685 (54.0%)
Yes 217 (48.0%) 143 (39.1%) 151 (46.6%) 219 (42.5%) 263 (57.5%) 221 (57.1%) 631 (51.2%) 583 (46.0%)
Europe
No 334 (73.9%) 262 (71.6%) 226 (69.8%) 316 (61.4%) 293 (64.1%) 247 (63.8%) 853 (69.2%) 825 (65.1%)
Yes 118 (26.1%) 104 (28.4%) 98 (30.2%) 199 (38.6%) 164 (35.9%) 140 (36.2%) 380 (30.8%) 443 (34.9%)
Oceania
No 447 (98.9%) 355 (97.0%) 308 (95.1%) 470 (91.3%) 417 (91.2%) 342 (88.4%) 1172 (95.1%) 1167 (92.0%)
Yes 5 (1.1%) 11 (3.0%) 16 (4.9%) 45 (8.7%) 40 (8.8%) 45 (11.6%) 61 (4.9%) 101 (8.0%)
Multicentre
No 277 (61.3%) 168 (45.9%) 215 (66.4%) 247 (48.0%) 248 (54.3%) 212 (54.8%) 740 (60.0%) 627 (49.4%)
Yes 175 (38.7%) 198 (54.1%) 109 (33.6%) 268 (52.0%) 209 (45.7%) 175 (45.2%) 493 (40.0%) 641 (50.6%)
Primary purpose
Other 19 (4.2%) 7 (1.9%) 63 (19.4%) 38 (7.4%) 52 (11.4%) 59 (15.2%) 134 (10.9%) 104 (8.2%)
Prevention 47 (10.4%) 74 (20.2%) 82 (25.3%) 186 (36.1%) 31 (6.8%) 57 (14.7%) 160 (13.0%) 317 (25.0%)
Treatment 386 (85.4%) 285 (77.9%) 179 (55.2%) 291 (56.5%) 374 (81.8%) 271 (70.0%) 939 (76.2%) 847 (66.8%)
Sponsor type
Industry 65 (14.4%) 117 (32.0%) 69 (21.3%) 192 (37.3%) 169 (37.0%) 157 (40.6%) 303 (24.6%) 466 (36.8%)
Investigator 29 (6.4%) 23 (6.3%) 8 (2.5%) 27 (5.2%) 19 (4.2%) 23 (5.9%) 56 (4.5%) 73 (5.8%)
Non industry 350 (77.4%) 223 (60.9%) 242 (74.7%) 294 (57.1%) 232 (50.8%) 201 (51.9%) 824 (66.8%) 718 (56.6%)
Missing 8 (1.8%) 3 (0.8%) 5 (1.5%) 2 (0.4%) 37 (8.1%) 6 (1.6%) 50 (4.1%) 11 (0.9%)
Sample size
Mean (SD) 4.57 (1.36) 5.19 (1.55) 4.36 (1.43) 5.24 (1.49) 4.24 (1.26) 4.83 (1.20) 4.39 (1.35) 5.10 (1.44)
Median [Min, Max] 4.45 [0, 11.8] 4.85 [1.95, 11.0] 4.25 [0, 10.8] 5.06 [0, 11.7] 4.09 [0.693, 11.8] 4.61 [0, 9.90] 4.25 [0, 11.8] 4.86 [0, 11.7]
Missing 2 (0.4%) 0 (0%) 2 (0.6%) 0 (0%) 0 (0%) 0 (0%) 4 (0.3%) 0 (0%)
Vaccine
No 437 (96.7%) 334 (91.3%) 229 (70.7%) 366 (71.1%) 441 (96.5%) 375 (96.9%) 1107 (89.8%) 1075 (84.8%)
Yes 15 (3.3%) 32 (8.7%) 95 (29.3%) 149 (28.9%) 16 (3.5%) 12 (3.1%) 126 (10.2%) 193 (15.2%)
Conventional
No 69 (15.3%) 65 (17.8%) 95 (29.3%) 178 (34.6%) 35 (7.7%) 41 (10.6%) 199 (16.1%) 284 (22.4%)
Yes 383 (84.7%) 301 (82.2%) 229 (70.7%) 337 (65.4%) 422 (92.3%) 346 (89.4%) 1034 (83.9%) 984 (77.6%)
Traditional
No 390 (86.3%) 323 (88.3%) 312 (96.3%) 464 (90.1%) 420 (91.9%) 344 (88.9%) 1122 (91.0%) 1131 (89.2%)
Yes 62 (13.7%) 43 (11.7%) 12 (3.7%) 51 (9.9%) 37 (8.1%) 43 (11.1%) 111 (9.0%) 137 (10.8%)
Subject blind
No 408 (90.3%) 16 (4.4%) 270 (83.3%) 48 (9.3%) 398 (87.1%) 18 (4.7%) 1076 (87.3%) 82 (6.5%)
Yes 1 (0.2%) 214 (58.5%) 0 (0%) 268 (52.0%) 0 (0%) 175 (45.2%) 1 (0.1%) 657 (51.8%)
Missing 43 (9.5%) 136 (37.2%) 54 (16.7%) 199 (38.6%) 59 (12.9%) 194 (50.1%) 156 (12.7%) 529 (41.7%)
Caregiver blind
No 408 (90.3%) 100 (27.3%) 270 (83.3%) 150 (29.1%) 398 (87.1%) 102 (26.4%) 1076 (87.3%) 352 (27.8%)
Yes 1 (0.2%) 130 (35.5%) 0 (0%) 166 (32.2%) 0 (0%) 87 (22.5%) 1 (0.1%) 383 (30.2%)
Missing 43 (9.5%) 136 (37.2%) 54 (16.7%) 199 (38.6%) 59 (12.9%) 198 (51.2%) 156 (12.7%) 533 (42.0%)
Investigator blind
No 408 (90.3%) 52 (14.2%) 270 (83.3%) 60 (11.7%) 398 (87.1%) 60 (15.5%) 1076 (87.3%) 172 (13.6%)
Yes 1 (0.2%) 178 (48.6%) 0 (0%) 256 (49.7%) 0 (0%) 133 (34.4%) 1 (0.1%) 567 (44.7%)
Missing 43 (9.5%) 136 (37.2%) 54 (16.7%) 199 (38.6%) 59 (12.9%) 194 (50.1%) 156 (12.7%) 529 (41.7%)
Outcome blind
No 408 (90.3%) 109 (29.8%) 270 (83.3%) 131 (25.4%) 398 (87.1%) 87 (22.5%) 1076 (87.3%) 327 (25.8%)
Yes 1 (0.2%) 121 (33.1%) 0 (0%) 189 (36.7%) 0 (0%) 103 (26.6%) 1 (0.1%) 413 (32.6%)
Missing 43 (9.5%) 136 (37.2%) 54 (16.7%) 195 (37.9%) 59 (12.9%) 197 (50.9%) 156 (12.7%) 528 (41.6%)
Analyst blind
No 409 (90.5%) 228 (62.3%) 270 (83.3%) 313 (60.8%) 398 (87.1%) 189 (48.8%) 1077 (87.3%) 730 (57.6%)
Yes 0 (0%) 2 (0.5%) 0 (0%) 3 (0.6%) 0 (0%) 0 (0%) 0 (0%) 5 (0.4%)
Missing 43 (9.5%) 136 (37.2%) 54 (16.7%) 199 (38.6%) 59 (12.9%) 198 (51.2%) 156 (12.7%) 533 (42.0%)

7.2.4 Prospective registration

table1::table1(~ control_arm + randomisation + blinding + prospective + source_registry + phase_clean + region_Africa + region_N_America + region_L_America + region_Asia + region_Europe + region_Oceania + multicentre + primary_purpose + sponsor_type + sample_size + vaccine + conventional + traditional + subject_blind + caregiver_blind + investigator_blind + outcome_blind + analyst_blind | study_arm * prospective, data = d)
covid
im
main
Overall
No
(N=259)
Yes
(N=559)
No
(N=281)
Yes
(N=558)
No
(N=222)
Yes
(N=622)
No
(N=762)
Yes
(N=1739)
Control arm
No 38 (14.7%) 67 (12.0%) 37 (13.2%) 62 (11.1%) 58 (26.1%) 154 (24.8%) 133 (17.5%) 283 (16.3%)
Yes 221 (85.3%) 492 (88.0%) 244 (86.8%) 496 (88.9%) 164 (73.9%) 468 (75.2%) 629 (82.5%) 1456 (83.7%)
Randomisation
No 57 (22.0%) 92 (16.5%) 58 (20.6%) 88 (15.8%) 69 (31.1%) 188 (30.2%) 184 (24.1%) 368 (21.2%)
Not applicable 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Yes 202 (78.0%) 467 (83.5%) 223 (79.4%) 470 (84.2%) 153 (68.9%) 434 (69.8%) 578 (75.9%) 1371 (78.8%)
Blinding
No 161 (62.2%) 291 (52.1%) 115 (40.9%) 209 (37.5%) 124 (55.9%) 333 (53.5%) 400 (52.5%) 833 (47.9%)
Yes 98 (37.8%) 268 (47.9%) 166 (59.1%) 349 (62.5%) 98 (44.1%) 289 (46.5%) 362 (47.5%) 906 (52.1%)
Prospective registration
No 259 (100%) 0 (0%) 281 (100%) 0 (0%) 222 (100%) 0 (0%) 762 (100%) 0 (0%)
Yes 0 (0%) 559 (100%) 0 (0%) 558 (100%) 0 (0%) 622 (100%) 0 (0%) 1739 (100%)
Source registry
ChiCTR 33 (12.7%) 24 (4.3%) 12 (4.3%) 30 (5.4%) 9 (4.1%) 51 (8.2%) 54 (7.1%) 105 (6.0%)
CT.gov 113 (43.6%) 304 (54.4%) 153 (54.4%) 302 (54.1%) 112 (50.5%) 240 (38.6%) 378 (49.6%) 846 (48.6%)
CTRI 1 (0.4%) 71 (12.7%) 6 (2.1%) 21 (3.8%) 1 (0.5%) 62 (10.0%) 8 (1.0%) 154 (8.9%)
EUCTR 36 (13.9%) 68 (12.2%) 20 (7.1%) 125 (22.4%) 13 (5.9%) 169 (27.2%) 69 (9.1%) 362 (20.8%)
IRCT 56 (21.6%) 53 (9.5%) 53 (18.9%) 7 (1.3%) 36 (16.2%) 22 (3.5%) 145 (19.0%) 82 (4.7%)
JPRN 1 (0.4%) 9 (1.6%) 20 (7.1%) 25 (4.5%) 31 (14.0%) 36 (5.8%) 52 (6.8%) 70 (4.0%)
Other 19 (7.3%) 30 (5.4%) 17 (6.0%) 48 (8.6%) 20 (9.0%) 42 (6.8%) 56 (7.3%) 120 (6.9%)
Phase
Phase 1 28 (10.8%) 57 (10.2%) 37 (13.2%) 49 (8.8%) 34 (15.3%) 75 (12.1%) 99 (13.0%) 181 (10.4%)
Phase 2 73 (28.2%) 220 (39.4%) 47 (16.7%) 134 (24.0%) 58 (26.1%) 192 (30.9%) 178 (23.4%) 546 (31.4%)
Phase 3 95 (36.7%) 185 (33.1%) 81 (28.8%) 172 (30.8%) 62 (27.9%) 173 (27.8%) 238 (31.2%) 530 (30.5%)
Phase 4 26 (10.0%) 35 (6.3%) 59 (21.0%) 127 (22.8%) 25 (11.3%) 93 (15.0%) 110 (14.4%) 255 (14.7%)
Undefined 36 (13.9%) 57 (10.2%) 48 (17.1%) 63 (11.3%) 34 (15.3%) 79 (12.7%) 118 (15.5%) 199 (11.4%)
Missing 1 (0.4%) 5 (0.9%) 9 (3.2%) 13 (2.3%) 9 (4.1%) 10 (1.6%) 19 (2.5%) 28 (1.6%)
Africa
No 251 (96.9%) 519 (92.8%) 266 (94.7%) 505 (90.5%) 209 (94.1%) 582 (93.6%) 726 (95.3%) 1606 (92.4%)
Yes 8 (3.1%) 40 (7.2%) 15 (5.3%) 53 (9.5%) 13 (5.9%) 40 (6.4%) 36 (4.7%) 133 (7.6%)
North America
No 227 (87.6%) 422 (75.5%) 225 (80.1%) 357 (64.0%) 184 (82.9%) 385 (61.9%) 636 (83.5%) 1164 (66.9%)
Yes 32 (12.4%) 137 (24.5%) 56 (19.9%) 201 (36.0%) 38 (17.1%) 237 (38.1%) 126 (16.5%) 575 (33.1%)
Latin America
No 226 (87.3%) 501 (89.6%) 263 (93.6%) 497 (89.1%) 207 (93.2%) 543 (87.3%) 696 (91.3%) 1541 (88.6%)
Yes 33 (12.7%) 58 (10.4%) 18 (6.4%) 61 (10.9%) 15 (6.8%) 79 (12.7%) 66 (8.7%) 198 (11.4%)
Asia
No 130 (50.2%) 328 (58.7%) 131 (46.6%) 338 (60.6%) 87 (39.2%) 273 (43.9%) 348 (45.7%) 939 (54.0%)
Yes 129 (49.8%) 231 (41.3%) 150 (53.4%) 220 (39.4%) 135 (60.8%) 349 (56.1%) 414 (54.3%) 800 (46.0%)
Europe
No 189 (73.0%) 407 (72.8%) 220 (78.3%) 322 (57.7%) 179 (80.6%) 361 (58.0%) 588 (77.2%) 1090 (62.7%)
Yes 70 (27.0%) 152 (27.2%) 61 (21.7%) 236 (42.3%) 43 (19.4%) 261 (42.0%) 174 (22.8%) 649 (37.3%)
Oceania
No 259 (100%) 543 (97.1%) 272 (96.8%) 506 (90.7%) 220 (99.1%) 539 (86.7%) 751 (98.6%) 1588 (91.3%)
Yes 0 (0%) 16 (2.9%) 9 (3.2%) 52 (9.3%) 2 (0.9%) 83 (13.3%) 11 (1.4%) 151 (8.7%)
Multicentre
No 166 (64.1%) 279 (49.9%) 202 (71.9%) 260 (46.6%) 162 (73.0%) 298 (47.9%) 530 (69.6%) 837 (48.1%)
Yes 93 (35.9%) 280 (50.1%) 79 (28.1%) 298 (53.4%) 60 (27.0%) 324 (52.1%) 232 (30.4%) 902 (51.9%)
Primary purpose
Other 7 (2.7%) 19 (3.4%) 41 (14.6%) 60 (10.8%) 32 (14.4%) 79 (12.7%) 80 (10.5%) 158 (9.1%)
Prevention 36 (13.9%) 85 (15.2%) 103 (36.7%) 165 (29.6%) 23 (10.4%) 65 (10.5%) 162 (21.3%) 315 (18.1%)
Treatment 216 (83.4%) 455 (81.4%) 137 (48.8%) 333 (59.7%) 167 (75.2%) 478 (76.8%) 520 (68.2%) 1266 (72.8%)
Sponsor type
Industry 39 (15.1%) 143 (25.6%) 77 (27.4%) 184 (33.0%) 53 (23.9%) 273 (43.9%) 169 (22.2%) 600 (34.5%)
Investigator 8 (3.1%) 44 (7.9%) 11 (3.9%) 24 (4.3%) 7 (3.2%) 35 (5.6%) 26 (3.4%) 103 (5.9%)
Non industry 211 (81.5%) 362 (64.8%) 189 (67.3%) 347 (62.2%) 138 (62.2%) 295 (47.4%) 538 (70.6%) 1004 (57.7%)
Missing 1 (0.4%) 10 (1.8%) 4 (1.4%) 3 (0.5%) 24 (10.8%) 19 (3.1%) 29 (3.8%) 32 (1.8%)
Sample size
Mean (SD) 4.66 (1.31) 4.93 (1.54) 4.55 (1.37) 5.08 (1.58) 4.30 (1.25) 4.58 (1.26) 4.52 (1.32) 4.85 (1.48)
Median [Min, Max] 4.61 [0, 10.6] 4.61 [1.79, 11.8] 4.48 [0, 9.10] 4.99 [0, 11.7] 4.19 [1.10, 11.8] 4.50 [0, 9.39] 4.41 [0, 11.8] 4.61 [0, 11.8]
Missing 1 (0.4%) 1 (0.2%) 2 (0.7%) 0 (0%) 0 (0%) 0 (0%) 3 (0.4%) 1 (0.1%)
Vaccine
No 251 (96.9%) 520 (93.0%) 187 (66.5%) 408 (73.1%) 215 (96.8%) 601 (96.6%) 653 (85.7%) 1529 (87.9%)
Yes 8 (3.1%) 39 (7.0%) 94 (33.5%) 150 (26.9%) 7 (3.2%) 21 (3.4%) 109 (14.3%) 210 (12.1%)
Conventional
No 33 (12.7%) 101 (18.1%) 102 (36.3%) 171 (30.6%) 22 (9.9%) 54 (8.7%) 157 (20.6%) 326 (18.7%)
Yes 226 (87.3%) 458 (81.9%) 179 (63.7%) 387 (69.4%) 200 (90.1%) 568 (91.3%) 605 (79.4%) 1413 (81.3%)
Traditional
No 226 (87.3%) 487 (87.1%) 262 (93.2%) 514 (92.1%) 195 (87.8%) 569 (91.5%) 683 (89.6%) 1570 (90.3%)
Yes 33 (12.7%) 72 (12.9%) 19 (6.8%) 44 (7.9%) 27 (12.2%) 53 (8.5%) 79 (10.4%) 169 (9.7%)
Subject blind
No 139 (53.7%) 285 (51.0%) 122 (43.4%) 196 (35.1%) 120 (54.1%) 296 (47.6%) 381 (50.0%) 777 (44.7%)
Yes 46 (17.8%) 169 (30.2%) 73 (26.0%) 195 (34.9%) 45 (20.3%) 130 (20.9%) 164 (21.5%) 494 (28.4%)
Missing 74 (28.6%) 105 (18.8%) 86 (30.6%) 167 (29.9%) 57 (25.7%) 196 (31.5%) 217 (28.5%) 468 (26.9%)
Caregiver blind
No 151 (58.3%) 357 (63.9%) 155 (55.2%) 265 (47.5%) 138 (62.2%) 362 (58.2%) 444 (58.3%) 984 (56.6%)
Yes 34 (13.1%) 97 (17.4%) 40 (14.2%) 126 (22.6%) 25 (11.3%) 62 (10.0%) 99 (13.0%) 285 (16.4%)
Missing 74 (28.6%) 105 (18.8%) 86 (30.6%) 167 (29.9%) 59 (26.6%) 198 (31.8%) 219 (28.7%) 470 (27.0%)
Investigator blind
No 151 (58.3%) 309 (55.3%) 127 (45.2%) 203 (36.4%) 128 (57.7%) 330 (53.1%) 406 (53.3%) 842 (48.4%)
Yes 34 (13.1%) 145 (25.9%) 68 (24.2%) 188 (33.7%) 37 (16.7%) 96 (15.4%) 139 (18.2%) 429 (24.7%)
Missing 74 (28.6%) 105 (18.8%) 86 (30.6%) 167 (29.9%) 57 (25.7%) 196 (31.5%) 217 (28.5%) 468 (26.9%)
Outcome blind
No 160 (61.8%) 357 (63.9%) 142 (50.5%) 259 (46.4%) 135 (60.8%) 350 (56.3%) 437 (57.3%) 966 (55.5%)
Yes 25 (9.7%) 97 (17.4%) 54 (19.2%) 135 (24.2%) 29 (13.1%) 74 (11.9%) 108 (14.2%) 306 (17.6%)
Missing 74 (28.6%) 105 (18.8%) 85 (30.2%) 164 (29.4%) 58 (26.1%) 198 (31.8%) 217 (28.5%) 467 (26.9%)
Analyst blind
No 185 (71.4%) 452 (80.9%) 195 (69.4%) 388 (69.5%) 163 (73.4%) 424 (68.2%) 543 (71.3%) 1264 (72.7%)
Yes 0 (0%) 2 (0.4%) 0 (0%) 3 (0.5%) 0 (0%) 0 (0%) 0 (0%) 5 (0.3%)
Missing 74 (28.6%) 105 (18.8%) 86 (30.6%) 167 (29.9%) 59 (26.6%) 198 (31.8%) 219 (28.7%) 470 (27.0%)

7.3 Outcomes over time (analysis 3)

7.4 Covid trial characteristics over time

Results number 3. Trials with missing start date (23) are dropped.

d_cov <- d %>% 
  filter(study_arm == "covid")

# sum(is.na(d_cov$start_date))

d_sum <- d_cov %>% 
    group_by(month = lubridate::floor_date(start_date, "month")) %>% 
    summarize(Controlled = sum(control_arm == "Yes")/n(),
              Randomised = sum(randomisation == "Yes")/n(),
              Blinded = sum(blinding == "Yes")/n(), 
              Prospective = sum(prospective == "Yes")/n()) %>% 
  gather(key = "variable", value = "value", -month) %>% 
  rename(Variable = variable) %>% 
  filter(complete.cases(.))
## `summarise()` ungrouping output (override with `.groups` argument)
# d_sum <- d_sum %>% 
#   filter(month > "2019-01-01" & month < "2020-01-01")

ggplot(d_sum, aes(x = month, y = value)) + 
  geom_line(aes(color = Variable, linetype = Variable)) + 
  theme_classic() + 
  labs(x = "Start date (grouped by month)", 
       y = "Proportion") +
  theme(legend.title = element_blank()) + 
  ylim(0,1)

7.5 Direct effect (analyses 4 and 5)

7.5.1 Complete case analysis

For each outcome we fit a logistic regression model with adjustment for all covariates, not including the other outcomes.

# define outcome variables
myvars <- c("control_arm", "randomisation", "blinding", "prospective")

# define the adjustment set. 
main_direct_adjustment <- main_dataset %>%
  select(covid, source_registry:traditional) %>% 
  names()

# originally this was the same as main, but there were problems with
# convergence, so intervention variables are dropped
indication_direct_adjustment <- main_dataset %>%
  select(covid, source_registry:traditional) %>% 
  select(-vaccine, -conventional, -traditional) %>% 
  names()

# logistic regression for each outcome
main_direct_models <- lapply(myvars, function (x){
  glm(as.formula(paste(x, "~", paste(main_direct_adjustment, collapse = "+"))),
      family = binomial(link = "logit"),
      data = main_dataset)
})

We are adjusting for covid, source_registry, phase_clean, region_Africa, region_N_America, region_L_America, region_Asia, region_Europe, region_Oceania, multicentre, primary_purpose, sponsor_type, sample_size, vaccine, conventional, traditional. Adjusting for source_registry leads to convergence problems so this variable has been excluded. Same with conventional (convergence problems for indication dataset).

7.5.1.1 Main dataset

sjPlot::tab_model(main_direct_models)
  Control arm randomisation Blinding Prospective registration
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.71 0.10 – 4.43 0.715 0.38 0.08 – 1.68 0.207 0.37 0.13 – 1.05 0.063 2.13 0.68 – 6.67 0.194
covidTRUE 1.91 1.38 – 2.65 <0.001 1.69 1.26 – 2.27 <0.001 0.73 0.57 – 0.92 0.008 0.81 0.62 – 1.05 0.106
Source registry: CT.gov 0.24 0.09 – 0.57 0.003 0.44 0.21 – 0.87 0.024 1.60 0.96 – 2.69 0.071 0.80 0.48 – 1.31 0.374
Source registry: CTRI 0.21 0.07 – 0.55 0.003 0.37 0.16 – 0.81 0.016 1.04 0.58 – 1.89 0.884 32.16 9.22 – 203.94 <0.001
Source registry: EUCTR 0.09 0.03 – 0.26 <0.001 0.29 0.12 – 0.67 0.004 1.32 0.70 – 2.50 0.390 0.91 0.46 – 1.78 0.777
Source registry: IRCT 0.54 0.17 – 1.55 0.269 0.59 0.26 – 1.32 0.210 3.16 1.79 – 5.63 <0.001 0.52 0.30 – 0.91 0.023
Source registry: JPRN 0.28 0.06 – 1.34 0.096 0.45 0.12 – 1.78 0.233 2.52 0.86 – 7.74 0.095 1.24 0.39 – 4.82 0.731
Source registry: Other 0.22 0.07 – 0.62 0.006 0.49 0.20 – 1.16 0.108 1.44 0.77 – 2.73 0.256 0.58 0.31 – 1.11 0.100
Phase: Phase 2 1.03 0.64 – 1.64 0.905 1.03 0.67 – 1.59 0.878 1.15 0.78 – 1.70 0.477 1.35 0.89 – 2.06 0.159
Phase: Phase 3 2.34 1.29 – 4.28 0.005 2.46 1.44 – 4.22 0.001 1.29 0.84 – 1.98 0.249 0.94 0.59 – 1.50 0.806
Phase: Phase 4 1.48 0.78 – 2.83 0.234 1.97 1.10 – 3.62 0.025 0.92 0.57 – 1.48 0.739 1.09 0.66 – 1.81 0.737
Phase: Undefined 0.98 0.55 – 1.76 0.959 1.05 0.63 – 1.78 0.845 1.53 0.97 – 2.39 0.065 0.95 0.59 – 1.54 0.845
Africa: Yes 1.69 0.80 – 4.01 0.197 1.18 0.63 – 2.39 0.619 1.03 0.66 – 1.61 0.901 1.79 1.03 – 3.25 0.048
North America: Yes 0.55 0.37 – 0.83 0.004 0.43 0.29 – 0.63 <0.001 0.80 0.59 – 1.08 0.150 2.02 1.39 – 3.00 <0.001
Latin America: Yes 1.11 0.65 – 1.98 0.702 1.48 0.88 – 2.57 0.152 1.88 1.31 – 2.73 0.001 0.76 0.49 – 1.19 0.218
Asia: Yes 0.62 0.41 – 0.93 0.020 0.63 0.43 – 0.93 0.020 0.69 0.51 – 0.94 0.018 1.08 0.74 – 1.61 0.709
Europe: Yes 0.99 0.61 – 1.64 0.970 0.69 0.44 – 1.09 0.113 0.67 0.47 – 0.95 0.025 1.27 0.83 – 1.98 0.280
Oceania: Yes 0.83 0.44 – 1.63 0.586 0.73 0.40 – 1.37 0.324 0.99 0.61 – 1.62 0.975 16.37 4.87 – 102.36 <0.001
Multicentre: Yes 0.75 0.52 – 1.10 0.139 0.85 0.60 – 1.19 0.338 0.99 0.75 – 1.29 0.918 1.48 1.10 – 1.98 0.009
Primary purpose:
Prevention
0.78 0.35 – 1.75 0.534 0.65 0.33 – 1.28 0.209 1.48 0.88 – 2.49 0.140 0.80 0.44 – 1.43 0.446
Primary purpose:
Treatment
0.48 0.26 – 0.84 0.013 0.57 0.34 – 0.94 0.033 0.73 0.48 – 1.10 0.137 0.89 0.55 – 1.40 0.609
Sponsor type:
Investigator
0.65 0.34 – 1.26 0.196 0.73 0.40 – 1.36 0.311 0.87 0.53 – 1.44 0.592 1.35 0.71 – 2.68 0.378
Sponsor type: Non
industry
0.70 0.47 – 1.03 0.073 0.67 0.47 – 0.96 0.032 0.54 0.40 – 0.73 <0.001 0.84 0.60 – 1.16 0.291
Sample size 2.32 1.97 – 2.74 <0.001 2.29 1.97 – 2.67 <0.001 1.37 1.24 – 1.52 <0.001 1.09 0.97 – 1.21 0.142
Vaccine: Yes 0.67 0.18 – 2.90 0.570 0.35 0.11 – 1.17 0.081 0.77 0.33 – 1.80 0.553 1.38 0.53 – 3.86 0.523
Conventional: Yes 1.77 0.51 – 7.65 0.402 1.25 0.45 – 3.88 0.676 0.80 0.40 – 1.58 0.515 0.71 0.33 – 1.52 0.373
Traditional: Yes 3.73 1.14 – 16.25 0.049 2.37 0.93 – 7.13 0.093 1.18 0.62 – 2.24 0.601 0.72 0.37 – 1.43 0.347
Observations 1588 1588 1588 1588
R2 Tjur 0.215 0.228 0.103 0.142
sjPlot::plot_models(main_direct_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

7.5.1.2 Indication-matched dataset

We repeat the same analyses on the indication-matched dataset.

# logistic regression for each outcome
indication_direct_models <- lapply(myvars, function (x){
  glm(as.formula(paste(x, "~", paste(indication_direct_adjustment, collapse = "+"))),
      family = binomial(link = "logit"),
      data = indication_dataset)
})

names(indication_direct_models) <- paste0(myvars, "_(direct_indication)")
# lapply(indication_direct_models, summary)

sjPlot::tab_model(indication_direct_models)
  Control arm randomisation Blinding Prospective registration
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.67 0.18 – 2.68 0.559 0.31 0.10 – 0.94 0.038 0.21 0.09 – 0.49 <0.001 0.46 0.20 – 1.06 0.067
covidTRUE 0.46 0.30 – 0.68 <0.001 0.49 0.35 – 0.68 <0.001 0.38 0.30 – 0.49 <0.001 1.02 0.79 – 1.31 0.894
Source registry: CT.gov 0.27 0.09 – 0.68 0.008 0.53 0.26 – 1.07 0.082 2.22 1.28 – 3.94 0.005 1.17 0.70 – 1.95 0.544
Source registry: CTRI 0.38 0.12 – 1.12 0.086 0.79 0.33 – 1.89 0.590 1.05 0.53 – 2.09 0.879 9.80 4.19 – 25.99 <0.001
Source registry: EUCTR 0.11 0.03 – 0.32 <0.001 0.37 0.15 – 0.89 0.029 1.30 0.66 – 2.58 0.452 1.34 0.69 – 2.60 0.381
Source registry: IRCT 0.56 0.18 – 1.67 0.312 0.74 0.33 – 1.63 0.460 3.02 1.65 – 5.62 <0.001 0.44 0.25 – 0.76 0.004
Source registry: JPRN 0.05 0.01 – 0.18 <0.001 0.12 0.04 – 0.34 <0.001 1.37 0.54 – 3.53 0.516 0.85 0.35 – 2.07 0.715
Source registry: Other 0.27 0.08 – 0.89 0.033 0.66 0.25 – 1.79 0.408 1.04 0.51 – 2.15 0.911 0.95 0.47 – 1.90 0.877
Phase: Phase 2 2.40 1.40 – 4.12 0.001 2.25 1.40 – 3.60 0.001 1.63 1.09 – 2.46 0.019 1.36 0.90 – 2.07 0.147
Phase: Phase 3 2.37 1.27 – 4.48 0.007 2.29 1.33 – 3.96 0.003 1.13 0.73 – 1.75 0.597 1.08 0.69 – 1.69 0.748
Phase: Phase 4 1.17 0.64 – 2.15 0.608 1.08 0.64 – 1.82 0.777 0.63 0.40 – 1.00 0.048 0.94 0.59 – 1.48 0.779
Phase: Undefined 0.92 0.50 – 1.72 0.804 0.97 0.56 – 1.67 0.913 0.94 0.58 – 1.52 0.808 0.83 0.51 – 1.34 0.448
Africa: Yes 1.85 0.77 – 5.54 0.213 2.22 0.99 – 5.96 0.075 1.42 0.91 – 2.27 0.132 1.70 1.03 – 2.92 0.045
North America: Yes 0.80 0.49 – 1.33 0.384 0.58 0.38 – 0.92 0.018 0.78 0.57 – 1.07 0.119 1.90 1.35 – 2.72 <0.001
Latin America: Yes 1.06 0.55 – 2.18 0.863 1.13 0.63 – 2.14 0.700 1.74 1.17 – 2.62 0.007 0.71 0.47 – 1.08 0.100
Asia: Yes 0.86 0.51 – 1.48 0.580 0.87 0.55 – 1.40 0.554 0.88 0.63 – 1.22 0.429 0.99 0.70 – 1.42 0.977
Europe: Yes 0.93 0.53 – 1.66 0.800 0.79 0.48 – 1.30 0.341 0.84 0.59 – 1.18 0.299 1.22 0.84 – 1.78 0.301
Oceania: Yes 1.25 0.49 – 3.74 0.658 1.22 0.52 – 3.16 0.664 1.70 0.93 – 3.23 0.095 3.63 1.72 – 8.53 0.001
Multicentre: Yes 1.17 0.77 – 1.80 0.470 1.41 0.99 – 2.04 0.060 1.25 0.97 – 1.62 0.090 1.57 1.20 – 2.06 0.001
Primary purpose:
Prevention
1.33 0.71 – 2.49 0.368 1.08 0.63 – 1.85 0.767 1.67 1.04 – 2.72 0.036 0.75 0.46 – 1.22 0.248
Primary purpose:
Treatment
2.36 1.35 – 4.06 0.002 2.63 1.62 – 4.24 <0.001 1.47 0.95 – 2.28 0.085 1.19 0.77 – 1.84 0.437
Sponsor type:
Investigator
1.06 0.48 – 2.44 0.896 1.00 0.49 – 2.11 0.994 0.98 0.57 – 1.69 0.944 1.32 0.73 – 2.45 0.369
Sponsor type: Non
industry
1.17 0.74 – 1.82 0.503 0.96 0.64 – 1.42 0.837 0.56 0.41 – 0.75 <0.001 1.12 0.82 – 1.53 0.469
Sample size 1.99 1.69 – 2.37 <0.001 1.85 1.61 – 2.14 <0.001 1.36 1.24 – 1.50 <0.001 1.16 1.06 – 1.28 0.002
Observations 1610 1610 1610 1610
R2 Tjur 0.148 0.172 0.163 0.136
sjPlot::plot_models(indication_direct_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1) # prefix.labels = "label"

7.5.2 Multiple imputation

Phase, sample size (for 3 trials) and sponsor type have missing values.

7.5.2.1 Main dataset

# generate start_year from start_date
main_dataset$start_year <- as.numeric(substr(main_dataset$start_date, 1, 4))

# create a new dataset where some variables are dropped
main_for_mice <- main_dataset %>% select(-TrialID, -start_date, -study_arm)

main_for_mice %>% map(class)
## $control_arm
## [1] "factor"
## 
## $randomisation
## [1] "factor"
## 
## $blinding
## [1] "factor"
## 
## $prospective
## [1] "factor"
## 
## $source_registry
## [1] "factor"
## 
## $phase_clean
## [1] "factor"
## 
## $region_Africa
## [1] "factor"
## 
## $region_N_America
## [1] "factor"
## 
## $region_L_America
## [1] "factor"
## 
## $region_Asia
## [1] "factor"
## 
## $region_Europe
## [1] "factor"
## 
## $region_Oceania
## [1] "factor"
## 
## $multicentre
## [1] "factor"
## 
## $primary_purpose
## [1] "factor"
## 
## $sponsor_type
## [1] "factor"
## 
## $sample_size
## [1] "numeric"
## 
## $vaccine
## [1] "factor"
## 
## $conventional
## [1] "factor"
## 
## $traditional
## [1] "factor"
## 
## $subject_blind
## [1] "factor"
## 
## $caregiver_blind
## [1] "factor"
## 
## $investigator_blind
## [1] "factor"
## 
## $outcome_blind
## [1] "factor"
## 
## $analyst_blind
## [1] "factor"
## 
## $covid
## [1] "logical"
## 
## $start_year
## [1] "numeric"

Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.

set.seed(5)

mice_in <- mice::mice(main_for_mice, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
correlation_matrix <- round(cor(sapply(main_for_mice[,c(myvars, main_direct_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)

# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)

labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))

ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),                                     
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_rect(fill = "white"),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.7, 0.7),
  legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(main_for_mice, pred = mice::quickpred(main_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred

Modify the predictor matrix such that only phase, sample size and sponsor type are imputed, using all variables.

predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("phase_clean", "sample_size", "sponsor_type"))),] <- 0

Generate 10 imputed datasets using chained equations (using package mice).

main_mice <- mice::mice(main_for_mice, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
attributes(main_mice)
## $names
##  [1] "data"            "imp"             "m"               "where"          
##  [5] "blocks"          "call"            "nmis"            "method"         
##  [9] "predictorMatrix" "visitSequence"   "formulas"        "post"           
## [13] "blots"           "ignore"          "seed"            "iteration"      
## [17] "lastSeedValue"   "chainMean"       "chainVar"        "loggedEvents"   
## [21] "version"         "date"           
## 
## $class
## [1] "mids"

Original data:

main_mice$data %>% head

Imputed datasets:

main_mice$imp %>% map(head)
## $control_arm
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $randomisation
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $blinding
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $prospective
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $source_registry
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $phase_clean
##           1       2         3       4       5         6       7       8       9
## 758 Phase 2 Phase 2   Phase 2 Phase 3 Phase 2   Phase 2 Phase 2 Phase 2 Phase 2
## 775 Phase 2 Phase 3   Phase 3 Phase 4 Phase 3   Phase 2 Phase 2 Phase 3 Phase 2
## 790 Phase 2 Phase 4 Undefined Phase 1 Phase 1   Phase 4 Phase 1 Phase 4 Phase 1
## 800 Phase 3 Phase 3   Phase 3 Phase 2 Phase 4   Phase 2 Phase 2 Phase 4 Phase 2
## 802 Phase 4 Phase 2   Phase 2 Phase 3 Phase 3 Undefined Phase 3 Phase 4 Phase 4
## 809 Phase 3 Phase 2   Phase 3 Phase 3 Phase 2   Phase 2 Phase 3 Phase 3 Phase 2
##          10
## 758 Phase 3
## 775 Phase 3
## 790 Phase 2
## 800 Phase 2
## 802 Phase 3
## 809 Phase 2
## 
## $region_Africa
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_N_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_L_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Asia
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Europe
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Oceania
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $multicentre
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $primary_purpose
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $sponsor_type
##                1            2            3            4            5
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Non industry Non industry     Industry Investigator
## 227 Investigator     Industry     Industry Non industry Non industry
## 272 Non industry Non industry Non industry Non industry Non industry
## 765 Non industry     Industry Non industry Non industry Non industry
## 766 Non industry Non industry Non industry     Industry Non industry
##                6            7            8            9           10
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Investigator Investigator Non industry Non industry
## 227 Non industry     Industry Investigator Non industry Non industry
## 272     Industry Investigator Investigator Non industry Non industry
## 765 Non industry Non industry Non industry Non industry     Industry
## 766 Non industry Non industry Non industry Non industry Non industry
## 
## $sample_size
##             1        2        3        4        5        6        7        8
## 1646 4.094345 6.461468 3.912023 5.298317 2.995732 4.094345 5.023881 5.010635
## 1648 4.382027 4.787492 4.060443 4.094345 5.298317 5.075174 7.600902 5.298317
##             9       10
## 1646 4.317488 3.912023
## 1648 5.010635 6.907755
## 
## $vaccine
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $conventional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $traditional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $subject_blind
##      1  2   3   4   5   6   7   8   9 10
## 1   No No  No  No Yes  No  No  No Yes No
## 2  Yes No  No  No  No Yes Yes  No  No No
## 3  Yes No Yes  No Yes  No  No Yes  No No
## 6   No No Yes  No  No Yes  No  No  No No
## 8  Yes No  No Yes  No Yes Yes Yes  No No
## 10 Yes No  No Yes  No  No Yes  No  No No
## 
## $caregiver_blind
##     1  2  3   4   5   6   7   8  9  10
## 1  No No No  No  No  No  No  No No  No
## 2  No No No Yes  No Yes  No  No No Yes
## 3  No No No  No  No  No  No  No No  No
## 6  No No No  No Yes  No Yes  No No  No
## 8  No No No  No  No  No Yes Yes No  No
## 10 No No No  No  No  No  No  No No  No
## 
## $investigator_blind
##      1   2   3   4   5  6   7   8   9  10
## 1   No  No  No Yes  No No  No  No  No  No
## 2   No Yes  No  No  No No  No Yes Yes  No
## 3   No  No  No  No Yes No  No Yes  No  No
## 6   No Yes Yes  No  No No  No  No Yes  No
## 8  Yes Yes  No Yes  No No Yes Yes  No  No
## 10  No Yes Yes  No  No No Yes  No Yes Yes
## 
## $outcome_blind
##      1   2  3   4  5   6   7   8   9  10
## 1  Yes Yes No  No No  No  No Yes Yes  No
## 2   No  No No  No No  No  No  No  No  No
## 3   No  No No Yes No Yes Yes  No  No  No
## 6   No  No No  No No Yes  No  No  No Yes
## 8   No  No No  No No  No  No  No  No Yes
## 10  No  No No  No No  No  No  No  No  No
## 
## $analyst_blind
##     1  2  3  4  5  6  7  8  9 10
## 1  No No No No No No No No No No
## 2  No No No No No No No No No No
## 3  No No No No No No No No No No
## 6  No No No No No No No No No No
## 8  No No No No No No No No No No
## 10 No No No No No No No No No No
## 
## $covid
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $start_year
##       1    2    3    4    5    6    7    8    9   10
## 32 2018 2019 2020 2020 2019 2019 2019 2020 2019 2019
## 34 2018 2020 2020 2018 2020 2020 2019 2020 2019 2019
## 35 2020 2020 2020 2020 2019 2019 2019 2020 2020 2019
## 48 2020 2019 2020 2020 2018 2020 2020 2020 2019 2019
## 60 2020 2019 2020 2020 2020 2019 2019 2020 2019 2019
## 66 2020 2020 2019 2018 2019 2019 2020 2020 2019 2019
# In order to get the third imputed data set, use the complete() function

Only impute the three variables listed above.

method_vector <- main_mice$method
method_vector[19:length(method_vector)] <- ""

Generate 10 imputed datasets using the updated method vector.

main_mice <- mice::mice(main_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
plot(main_mice)

Check that there is no trend with further iterations and that the lines mix.

main_mice_40 <- mice::mice.mids(main_mice, maxit = 40, print = FALSE)
plot(main_mice_40)

Plot of observed (blue) and imputed (red) phase:

mice::stripplot(main_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(main_mice)

7.5.2.1.1 Analysis

Logistic regression analysis on the multiply imputed data.

main_direct_models_mice <- lapply(myvars, function (x){
  fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(main_direct_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(main_direct_models_mice) <- myvars
lapply(main_direct_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                         term     estimate  std.error   statistic        df
## 1                (Intercept) -0.323572308 0.92959121 -0.34808022 1598.4373
## 2                  covidTRUE  0.618259500 0.16237967  3.80749322 1597.0008
## 3      source_registryCT.gov -1.411159424 0.47728743 -2.95662389 1601.4857
## 4        source_registryCTRI -1.533922309 0.52344811 -2.93041906 1598.0291
## 5       source_registryEUCTR -2.350422124 0.54254335 -4.33222919 1601.3571
## 6        source_registryIRCT -0.596825348 0.55017172 -1.08479829 1598.7801
## 7        source_registryJPRN -2.075894604 0.57411290 -3.61582993 1596.2357
## 8       source_registryOther -1.612302584 0.53839782 -2.99463061 1601.5954
## 9         phase_cleanPhase 2  0.003202403 0.23500209  0.01362713 1587.2581
## 10        phase_cleanPhase 3  0.846821180 0.30287209  2.79596968 1403.0430
## 11        phase_cleanPhase 4  0.481981354 0.32549989  1.48074197 1519.0704
## 12      phase_cleanUndefined -0.054094868 0.29130187 -0.18570038 1591.3714
## 13          region_AfricaYes  0.569047966 0.40422593  1.40774730 1601.4203
## 14       region_N_AmericaYes -0.566042714 0.20440011 -2.76928766 1589.4581
## 15       region_L_AmericaYes  0.026915363 0.27384848  0.09828560 1596.2130
## 16            region_AsiaYes -0.456541548 0.20618992 -2.21417977 1585.4265
## 17          region_EuropeYes  0.035887997 0.24776273  0.14484825 1597.8287
## 18         region_OceaniaYes -0.124661350 0.33039352 -0.37731173 1600.4312
## 19            multicentreYes -0.353441145 0.18536918 -1.90668779 1589.0634
## 20 primary_purposePrevention -0.427248647 0.38472816 -1.11052087 1600.1633
## 21  primary_purposeTreatment -0.688233028 0.28845954 -2.38589106 1600.5377
## 22  sponsor_typeInvestigator -0.449343969 0.33296184 -1.34953595  356.8947
## 23  sponsor_typeNon industry -0.369521068 0.19801429 -1.86613332 1290.4162
## 24               sample_size  0.834435791 0.08238432 10.12857553 1560.1915
## 25                vaccineYes -0.274335663 0.67531644 -0.40623276 1600.4876
## 26           conventionalYes  0.543418138 0.66247970  0.82027893 1599.6400
## 27            traditionalYes  1.327909251 0.65466071  2.02839309 1601.1526
##         p.value
## 1  7.278258e-01
## 2  1.456916e-04
## 3  3.155793e-03
## 4  3.433111e-03
## 5  1.567791e-05
## 6  2.781746e-01
## 7  3.086927e-04
## 8  2.789913e-03
## 9  9.891292e-01
## 10 5.244985e-03
## 11 1.388827e-01
## 12 8.527034e-01
## 13 1.594000e-01
## 14 5.683192e-03
## 15 9.217179e-01
## 16 2.695823e-02
## 17 8.848490e-01
## 18 7.059920e-01
## 19 5.674123e-02
## 20 2.669415e-01
## 21 1.715347e-02
## 22 1.780203e-01
## 23 6.224943e-02
## 24 0.000000e+00
## 25 6.846259e-01
## 26 4.121793e-01
## 27 4.268538e-02
## 
## $randomisation
##                         term    estimate std.error   statistic        df
## 1                (Intercept) -0.77791775 0.7431457 -1.04679031 1594.4882
## 2                  covidTRUE  0.47299023 0.1468621  3.22064174 1597.3922
## 3      source_registryCT.gov -0.79706700 0.3647786 -2.18507081 1600.9901
## 4        source_registryCTRI -0.97688208 0.4114559 -2.37420821 1594.3545
## 5       source_registryEUCTR -1.19851986 0.4346808 -2.75724163 1601.0225
## 6        source_registryIRCT -0.51295104 0.4147741 -1.23669966 1596.6432
## 7        source_registryJPRN -1.37094423 0.4820941 -2.84372718 1586.5883
## 8       source_registryOther -0.94062843 0.4356893 -2.15894330 1601.5228
## 9         phase_cleanPhase 2  0.01687318 0.2178745  0.07744450 1588.3124
## 10        phase_cleanPhase 3  0.90141181 0.2729890  3.30200796 1441.7912
## 11        phase_cleanPhase 4  0.76078839 0.3017254  2.52145979 1549.7167
## 12      phase_cleanUndefined  0.03308733 0.2629163  0.12584740 1593.2046
## 13          region_AfricaYes  0.20580646 0.3378080  0.60924100 1601.2102
## 14       region_N_AmericaYes -0.83568882 0.1935904 -4.31678934 1590.1850
## 15       region_L_AmericaYes  0.31546991 0.2635272  1.19710583 1598.8803
## 16            region_AsiaYes -0.44884986 0.1951778 -2.29969779 1592.2280
## 17          region_EuropeYes -0.39133976 0.2249493 -1.73967957 1597.9718
## 18         region_OceaniaYes -0.22094743 0.3120843 -0.70797352 1600.6668
## 19            multicentreYes -0.22546972 0.1698297 -1.32762224 1593.0547
## 20 primary_purposePrevention -0.52012954 0.3331818 -1.56109818 1600.0354
## 21  primary_purposeTreatment -0.47155619 0.2543431 -1.85401597 1598.5342
## 22  sponsor_typeInvestigator -0.34023255 0.3147573 -1.08093613  384.8753
## 23  sponsor_typeNon industry -0.41199108 0.1830437 -2.25077954 1357.6037
## 24               sample_size  0.82554355 0.0760372 10.85710091 1574.3164
## 25                vaccineYes -1.11869495 0.5581184 -2.00440426 1600.6558
## 26           conventionalYes  0.01356544 0.5116842  0.02651135 1600.1972
## 27            traditionalYes  0.74026928 0.4858899  1.52353299 1601.2040
##         p.value
## 1  2.953550e-01
## 2  1.304768e-03
## 3  2.902819e-02
## 4  1.770445e-02
## 5  5.895330e-03
## 6  2.163805e-01
## 7  4.516075e-03
## 8  3.100268e-02
## 9  9.382797e-01
## 10 9.834478e-04
## 11 1.178648e-02
## 12 8.998686e-01
## 13 5.424511e-01
## 14 1.680856e-05
## 15 2.314428e-01
## 16 2.159420e-02
## 17 8.210784e-02
## 18 4.790648e-01
## 19 1.844931e-01
## 20 1.186983e-01
## 21 6.392096e-02
## 22 2.804024e-01
## 23 2.455897e-02
## 24 0.000000e+00
## 25 4.519496e-02
## 26 9.788528e-01
## 27 1.278229e-01
## 
## $blinding
##                         term    estimate  std.error   statistic       df
## 1                (Intercept) -0.98388666 0.52496003 -1.87421252 1600.575
## 2                  covidTRUE -0.32288357 0.11998295 -2.69107889 1601.841
## 3      source_registryCT.gov  0.48370794 0.26082150  1.85455545 1601.712
## 4        source_registryCTRI  0.07103238 0.29808919  0.23829237 1599.308
## 5       source_registryEUCTR  0.27391694 0.32307750  0.84783664 1601.780
## 6        source_registryIRCT  1.15731317 0.29065028  3.98180649 1601.576
## 7        source_registryJPRN -0.78976801 0.42264922 -1.86861343 1599.118
## 8       source_registryOther  0.28480634 0.32120703  0.88667530 1601.793
## 9         phase_cleanPhase 2  0.08308180 0.19756945  0.42051948 1600.324
## 10        phase_cleanPhase 3  0.21934709 0.21851818  1.00379330 1600.535
## 11        phase_cleanPhase 4 -0.12207191 0.23873740 -0.51132295 1600.373
## 12      phase_cleanUndefined  0.37764738 0.22816573  1.65514503 1599.719
## 13          region_AfricaYes  0.04741161 0.22748126  0.20841984 1601.841
## 14       region_N_AmericaYes -0.21361914 0.15369021 -1.38993324 1600.689
## 15       region_L_AmericaYes  0.61687622 0.18607173  3.31526032 1601.801
## 16            region_AsiaYes -0.35333982 0.15452359 -2.28663992 1600.966
## 17          region_EuropeYes -0.38776147 0.17739629 -2.18584875 1601.668
## 18         region_OceaniaYes -0.00532621 0.25061832 -0.02125228 1601.841
## 19            multicentreYes  0.01803151 0.13559170  0.13298391 1598.748
## 20 primary_purposePrevention  0.29482269 0.26031212  1.13257380 1601.401
## 21  primary_purposeTreatment -0.36392841 0.20741440 -1.75459568 1601.394
## 22  sponsor_typeInvestigator -0.18870460 0.24901537 -0.75780305 1394.104
## 23  sponsor_typeNon industry -0.60670810 0.14922794 -4.06564674 1576.597
## 24               sample_size  0.31469309 0.05168821  6.08829596 1594.497
## 25                vaccineYes -0.27690898 0.41795170 -0.66253822 1600.980
## 26           conventionalYes -0.16509395 0.34300267 -0.48131973 1600.888
## 27            traditionalYes  0.20211822 0.31753185  0.63652896 1601.455
##         p.value
## 1  6.108317e-02
## 2  7.196208e-03
## 3  6.384341e-02
## 4  8.116849e-01
## 5  3.966556e-01
## 6  7.145597e-05
## 7  6.185933e-02
## 8  3.753868e-01
## 9  6.741625e-01
## 10 3.156300e-01
## 11 6.091955e-01
## 12 9.809123e-02
## 13 8.349277e-01
## 14 1.647424e-01
## 15 9.360371e-04
## 16 2.234689e-02
## 17 2.897104e-02
## 18 9.830471e-01
## 19 8.942228e-01
## 20 2.575628e-01
## 21 7.951973e-02
## 22 4.486970e-01
## 23 5.026141e-05
## 24 1.427003e-09
## 25 5.077217e-01
## 26 6.303551e-01
## 27 5.245227e-01
## 
## $prospective
##                         term    estimate  std.error  statistic        df
## 1                (Intercept)  0.71258425 0.57509983  1.2390618 1599.8737
## 2                  covidTRUE -0.14774011 0.13188767 -1.1201965 1601.0682
## 3      source_registryCT.gov -0.22647422 0.25674079 -0.8821123 1601.4474
## 4        source_registryCTRI  3.53257295 0.75030551  4.7081794 1600.8059
## 5       source_registryEUCTR -0.13371242 0.34513908 -0.3874160 1601.8257
## 6        source_registryIRCT -0.63226526 0.28485937 -2.2195698 1599.3262
## 7        source_registryJPRN -0.81686212 0.38459597 -2.1239487 1597.4909
## 8       source_registryOther -0.46194700 0.32408699 -1.4253796 1601.8025
## 9         phase_cleanPhase 2  0.27074235 0.21274668  1.2726043 1595.9858
## 10        phase_cleanPhase 3 -0.12064423 0.23648010 -0.5101665 1588.9664
## 11        phase_cleanPhase 4  0.05703499 0.25424262  0.2243329 1578.6212
## 12      phase_cleanUndefined -0.08155917 0.24380493 -0.3345263 1594.0350
## 13          region_AfricaYes  0.61239293 0.29428346  2.0809628 1601.3989
## 14       region_N_AmericaYes  0.73492863 0.19492825  3.7702520 1601.5679
## 15       region_L_AmericaYes -0.24873193 0.22524619 -1.1042670 1601.3458
## 16            region_AsiaYes  0.10622445 0.19806108  0.5363217 1600.7600
## 17          region_EuropeYes  0.30630468 0.21849226  1.4019018 1601.8218
## 18         region_OceaniaYes  2.75812481 0.73697934  3.7424724 1601.8407
## 19            multicentreYes  0.43682705 0.14492318  3.0141973 1599.9605
## 20 primary_purposePrevention -0.25774904 0.29236225 -0.8816085 1601.5769
## 21  primary_purposeTreatment -0.21431002 0.23518810 -0.9112281 1600.5932
## 22  sponsor_typeInvestigator  0.18529664 0.31758872  0.5834484  558.6916
## 23  sponsor_typeNon industry -0.16974099 0.16700650 -1.0163736 1594.9972
## 24               sample_size  0.07110458 0.05526889  1.2865208 1571.5518
## 25                vaccineYes  0.29697460 0.48326207  0.6145208 1601.6674
## 26           conventionalYes -0.23629320 0.37886094 -0.6236938 1601.7313
## 27            traditionalYes -0.25249890 0.34250672 -0.7372086 1601.8407
##         p.value
## 1  2.155043e-01
## 2  2.627980e-01
## 3  3.778484e-01
## 4  2.715148e-06
## 5  6.984997e-01
## 6  2.658780e-02
## 7  3.382770e-02
## 8  1.542423e-01
## 9  2.033438e-01
## 10 6.100056e-01
## 11 8.225273e-01
## 12 7.380264e-01
## 13 3.759589e-02
## 14 1.690058e-04
## 15 2.696432e-01
## 16 5.918108e-01
## 17 1.611383e-01
## 18 1.886515e-04
## 19 2.617114e-03
## 20 3.781208e-01
## 21 3.623124e-01
## 22 5.598269e-01
## 23 3.096057e-01
## 24 1.984508e-01
## 25 5.389585e-01
## 26 5.329175e-01
## 27 4.611035e-01
sum_main_direct_mice <- lapply(main_direct_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_main_direct_mice <- lapply(sum_main_direct_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})

pool_OR_main_direct_mice <- do.call(rbind.data.frame, pool_OR_main_direct_mice)
colnames(pool_OR_main_direct_mice) <- c("Estimate", "Lower CI", "Upper CI", "P-value")
pool_OR_main_direct_mice$Analysis <- "Main direct (4)"

fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data

mice::densityplot(main_mice, ~ phase_clean)

mice::densityplot(main_mice, ~ sponsor_type)

mice::densityplot(main_mice, ~ sample_size)

7.5.2.1.2 Compare complete case analysis with MICE analysis for the each outcome
7.5.2.1.2.1 Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(main_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.2300   0.1641   0.3709   0.6190   2.2914  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.34524    0.94605  -0.365  0.71517    
## covidTRUE                  0.64700    0.16590   3.900 9.62e-05 ***
## source_registryCT.gov     -1.43398    0.47798  -3.000  0.00270 ** 
## source_registryCTRI       -1.56766    0.52492  -2.986  0.00282 ** 
## source_registryEUCTR      -2.35595    0.54439  -4.328 1.51e-05 ***
## source_registryIRCT       -0.60915    0.55104  -1.105  0.26896    
## source_registryJPRN       -1.28158    0.77065  -1.663  0.09631 .  
## source_registryOther      -1.50499    0.54654  -2.754  0.00589 ** 
## phase_cleanPhase 2         0.02835    0.23797   0.119  0.90518    
## phase_cleanPhase 3         0.85186    0.30478   2.795  0.00519 ** 
## phase_cleanPhase 4         0.38919    0.32689   1.191  0.23383    
## phase_cleanUndefined      -0.01534    0.29509  -0.052  0.95854    
## region_AfricaYes           0.52325    0.40560   1.290  0.19703    
## region_N_AmericaYes       -0.59577    0.20700  -2.878  0.00400 ** 
## region_L_AmericaYes        0.10826    0.28337   0.382  0.70242    
## region_AsiaYes            -0.48605    0.20844  -2.332  0.01971 *  
## region_EuropeYes          -0.00945    0.25339  -0.037  0.97025    
## region_OceaniaYes         -0.18176    0.33384  -0.544  0.58614    
## multicentreYes            -0.28371    0.19159  -1.481  0.13865    
## primary_purposePrevention -0.25428    0.40867  -0.622  0.53379    
## primary_purposeTreatment  -0.73610    0.29646  -2.483  0.01303 *  
## sponsor_typeInvestigator  -0.43063    0.33325  -1.292  0.19628    
## sponsor_typeNon industry  -0.35873    0.19983  -1.795  0.07263 .  
## sample_size                0.84054    0.08373  10.038  < 2e-16 ***
## vaccineYes                -0.39679    0.69881  -0.568  0.57016    
## conventionalYes            0.57009    0.68070   0.838  0.40231    
## traditionalYes             1.31550    0.66888   1.967  0.04922 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1497.6  on 1587  degrees of freedom
## Residual deviance: 1189.8  on 1561  degrees of freedom
##   (74 observations deleted due to missingness)
## AIC: 1243.8
## 
## Number of Fisher Scoring iterations: 6
fit <- with(main_mice, glm(as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.1.2.2 Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(main_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.8363   0.1173   0.4368   0.6938   2.3118  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.97122    0.76955  -1.262 0.206930    
## covidTRUE                  0.52597    0.14976   3.512 0.000445 ***
## source_registryCT.gov     -0.82466    0.36508  -2.259 0.023892 *  
## source_registryCTRI       -0.99723    0.41228  -2.419 0.015571 *  
## source_registryEUCTR      -1.24284    0.43653  -2.847 0.004412 ** 
## source_registryIRCT       -0.52090    0.41518  -1.255 0.209607    
## source_registryJPRN       -0.80277    0.67318  -1.193 0.233063    
## source_registryOther      -0.72117    0.44822  -1.609 0.107625    
## phase_cleanPhase 2         0.03378    0.22034   0.153 0.878166    
## phase_cleanPhase 3         0.90101    0.27411   3.287 0.001012 ** 
## phase_cleanPhase 4         0.67994    0.30357   2.240 0.025101 *  
## phase_cleanUndefined       0.05189    0.26561   0.195 0.845098    
## region_AfricaYes           0.16886    0.33918   0.498 0.618601    
## region_N_AmericaYes       -0.83954    0.19586  -4.286 1.82e-05 ***
## region_L_AmericaYes        0.39010    0.27209   1.434 0.151645    
## region_AsiaYes            -0.45841    0.19737  -2.323 0.020200 *  
## region_EuropeYes          -0.36571    0.23085  -1.584 0.113150    
## region_OceaniaYes         -0.31150    0.31586  -0.986 0.324046    
## multicentreYes            -0.16751    0.17477  -0.958 0.337849    
## primary_purposePrevention -0.43757    0.34832  -1.256 0.209032    
## primary_purposeTreatment  -0.55979    0.26182  -2.138 0.032514 *  
## sponsor_typeInvestigator  -0.31801    0.31406  -1.013 0.311262    
## sponsor_typeNon industry  -0.39483    0.18461  -2.139 0.032460 *  
## sample_size                0.82826    0.07714  10.737  < 2e-16 ***
## vaccineYes                -1.03897    0.59606  -1.743 0.081325 .  
## conventionalYes            0.22692    0.54359   0.417 0.676346    
## traditionalYes             0.86484    0.51526   1.678 0.093259 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1724.2  on 1587  degrees of freedom
## Residual deviance: 1384.1  on 1561  degrees of freedom
##   (74 observations deleted due to missingness)
## AIC: 1438.1
## 
## Number of Fisher Scoring iterations: 5
fit <- with(main_mice, glm(as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.1.2.3 Blinding
summary(glm(as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(main_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4562  -1.0479  -0.7021   1.1217   1.9151  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.985183   0.529629  -1.860 0.062866 .  
## covidTRUE                 -0.319825   0.120826  -2.647 0.008121 ** 
## source_registryCT.gov      0.471870   0.261535   1.804 0.071195 .  
## source_registryCTRI        0.043744   0.299956   0.146 0.884051    
## source_registryEUCTR       0.278409   0.323742   0.860 0.389804    
## source_registryIRCT        1.150588   0.291532   3.947 7.92e-05 ***
## source_registryJPRN        0.923499   0.552734   1.671 0.094765 .  
## source_registryOther       0.367803   0.323775   1.136 0.255965    
## phase_cleanPhase 2         0.141445   0.198823   0.711 0.476829    
## phase_cleanPhase 3         0.253202   0.219806   1.152 0.249349    
## phase_cleanPhase 4        -0.080269   0.240979  -0.333 0.739063    
## phase_cleanUndefined       0.422286   0.228867   1.845 0.065020 .  
## region_AfricaYes           0.028242   0.227861   0.124 0.901359    
## region_N_AmericaYes       -0.221598   0.153847  -1.440 0.149760    
## region_L_AmericaYes        0.632431   0.187540   3.372 0.000746 ***
## region_AsiaYes            -0.366039   0.154933  -2.363 0.018149 *  
## region_EuropeYes          -0.400955   0.178577  -2.245 0.024750 *  
## region_OceaniaYes         -0.007944   0.250653  -0.032 0.974716    
## multicentreYes            -0.014139   0.137642  -0.103 0.918185    
## primary_purposePrevention  0.390406   0.264763   1.475 0.140333    
## primary_purposeTreatment  -0.312637   0.210117  -1.488 0.136772    
## sponsor_typeInvestigator  -0.137036   0.255614  -0.536 0.591888    
## sponsor_typeNon industry  -0.611411   0.150362  -4.066 4.78e-05 ***
## sample_size                0.315180   0.052056   6.055 1.41e-09 ***
## vaccineYes                -0.256717   0.432746  -0.593 0.553029    
## conventionalYes           -0.228963   0.351349  -0.652 0.514615    
## traditionalYes             0.169326   0.323662   0.523 0.600864    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2194.1  on 1587  degrees of freedom
## Residual deviance: 2022.7  on 1561  degrees of freedom
##   (74 observations deleted due to missingness)
## AIC: 2076.7
## 
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.1.2.4 Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(main_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0200  -1.0575   0.5725   0.8440   1.4896  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                0.75616    0.58158   1.300 0.193544    
## covidTRUE                 -0.21664    0.13383  -1.619 0.105506    
## source_registryCT.gov     -0.22929    0.25765  -0.890 0.373507    
## source_registryCTRI        3.47071    0.75040   4.625 3.74e-06 ***
## source_registryEUCTR      -0.09829    0.34659  -0.284 0.776731    
## source_registryIRCT       -0.65173    0.28610  -2.278 0.022727 *  
## source_registryJPRN        0.21656    0.62886   0.344 0.730570    
## source_registryOther      -0.53804    0.32748  -1.643 0.100390    
## phase_cleanPhase 2         0.30294    0.21505   1.409 0.158922    
## phase_cleanPhase 3        -0.05856    0.23852  -0.246 0.806062    
## phase_cleanPhase 4         0.08626    0.25639   0.336 0.736544    
## phase_cleanUndefined      -0.04768    0.24434  -0.195 0.845271    
## region_AfricaYes           0.58034    0.29331   1.979 0.047862 *  
## region_N_AmericaYes        0.70411    0.19596   3.593 0.000327 ***
## region_L_AmericaYes       -0.27907    0.22678  -1.231 0.218485    
## region_AsiaYes             0.07420    0.19848   0.374 0.708521    
## region_EuropeYes           0.23831    0.22081   1.079 0.280462    
## region_OceaniaYes          2.79560    0.73834   3.786 0.000153 ***
## multicentreYes             0.38960    0.14842   2.625 0.008665 ** 
## primary_purposePrevention -0.22668    0.29761  -0.762 0.446265    
## primary_purposeTreatment  -0.12162    0.23794  -0.511 0.609252    
## sponsor_typeInvestigator   0.29738    0.33727   0.882 0.377920    
## sponsor_typeNon industry  -0.17980    0.17019  -1.056 0.290776    
## sample_size                0.08214    0.05590   1.469 0.141734    
## vaccineYes                 0.32409    0.50698   0.639 0.522649    
## conventionalYes           -0.34526    0.38788  -0.890 0.373402    
## traditionalYes            -0.32613    0.34693  -0.940 0.347192    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1891.4  on 1587  degrees of freedom
## Residual deviance: 1632.3  on 1561  degrees of freedom
##   (74 observations deleted due to missingness)
## AIC: 1686.3
## 
## Number of Fisher Scoring iterations: 6
fit <- with(main_mice, glm(as.formula(paste("prospective", "~", paste(main_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)

7.5.2.2 Indication-matched dataset

# generate start_year from start_date
indication_dataset$start_year <- as.numeric(substr(indication_dataset$start_date, 1, 4))

# create a new dataset where some variables are dropped
indication_for_mice <- indication_dataset %>% select(-TrialID, -start_date, -study_arm)

indication_for_mice %>% map(class)
## $control_arm
## [1] "factor"
## 
## $randomisation
## [1] "factor"
## 
## $blinding
## [1] "factor"
## 
## $prospective
## [1] "factor"
## 
## $source_registry
## [1] "factor"
## 
## $phase_clean
## [1] "factor"
## 
## $region_Africa
## [1] "factor"
## 
## $region_N_America
## [1] "factor"
## 
## $region_L_America
## [1] "factor"
## 
## $region_Asia
## [1] "factor"
## 
## $region_Europe
## [1] "factor"
## 
## $region_Oceania
## [1] "factor"
## 
## $multicentre
## [1] "factor"
## 
## $primary_purpose
## [1] "factor"
## 
## $sponsor_type
## [1] "factor"
## 
## $sample_size
## [1] "numeric"
## 
## $vaccine
## [1] "factor"
## 
## $conventional
## [1] "factor"
## 
## $traditional
## [1] "factor"
## 
## $subject_blind
## [1] "factor"
## 
## $caregiver_blind
## [1] "factor"
## 
## $investigator_blind
## [1] "factor"
## 
## $outcome_blind
## [1] "factor"
## 
## $analyst_blind
## [1] "factor"
## 
## $covid
## [1] "logical"
## 
## $start_year
## [1] "numeric"

Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.

set.seed(5)

mice_in <- mice::mice(indication_for_mice, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred
correlation_matrix <- round(cor(sapply(indication_for_mice[,c(myvars, indication_direct_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)

# labels_plot <- levels(melted_correlation_matrix[,1])
# labels_plot <- labels[levels(melted_correlation_matrix[,1]),]$short
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))

ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),                                     
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
# panel.background = element_blank(),
  panel.background = element_rect(fill = "white"),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.7, 0.7),
  legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(indication_for_mice, pred = mice::quickpred(indication_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred

Modify the predictor matrix such that only phase, sample size and sponsor type are imputed, using all variables.

predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("phase_clean", "sample_size", "sponsor_type"))),] <- 0

Generate 10 imputed datasets using chained equations (using package mice).

indication_mice <- mice::mice(indication_for_mice, m = 10, pred = predictor_matrix , print = FALSE)
## Warning: Number of logged events: 150
attributes(indication_mice)
## $names
##  [1] "data"            "imp"             "m"               "where"          
##  [5] "blocks"          "call"            "nmis"            "method"         
##  [9] "predictorMatrix" "visitSequence"   "formulas"        "post"           
## [13] "blots"           "ignore"          "seed"            "iteration"      
## [17] "lastSeedValue"   "chainMean"       "chainVar"        "loggedEvents"   
## [21] "version"         "date"           
## 
## $class
## [1] "mids"

Original data:

indication_mice$data %>% head

Imputed datasets:

indication_mice$imp %>% map(head)
## $control_arm
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $randomisation
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $blinding
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $prospective
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $source_registry
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $phase_clean
##             1         2       3         4       5         6       7       8
## 233   Phase 2   Phase 2 Phase 2   Phase 4 Phase 3   Phase 3 Phase 3 Phase 2
## 234   Phase 2   Phase 4 Phase 2   Phase 2 Phase 2   Phase 2 Phase 2 Phase 2
## 235   Phase 3   Phase 2 Phase 2   Phase 4 Phase 2   Phase 2 Phase 3 Phase 4
## 663   Phase 2   Phase 3 Phase 4   Phase 3 Phase 4   Phase 4 Phase 2 Phase 3
## 666   Phase 3   Phase 4 Phase 3 Undefined Phase 3   Phase 4 Phase 2 Phase 3
## 686 Undefined Undefined Phase 4   Phase 3 Phase 3 Undefined Phase 3 Phase 3
##             9      10
## 233   Phase 2 Phase 2
## 234   Phase 2 Phase 1
## 235   Phase 3 Phase 3
## 663   Phase 2 Phase 3
## 666 Undefined Phase 4
## 686   Phase 2 Phase 2
## 
## $region_Africa
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_N_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_L_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Asia
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Europe
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Oceania
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $multicentre
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $primary_purpose
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $sponsor_type
##                1            2            3            4            5
## 121 Non industry Non industry Non industry Non industry Non industry
## 179     Industry Non industry     Industry     Industry Non industry
## 224 Non industry     Industry Non industry     Industry Non industry
## 686 Investigator     Industry Non industry     Industry Non industry
## 689     Industry Non industry Non industry Non industry     Industry
## 690 Non industry     Industry Non industry Non industry     Industry
##                6            7            8            9           10
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Non industry     Industry Non industry Non industry Non industry
## 224 Non industry Investigator Non industry     Industry Investigator
## 686 Non industry Non industry     Industry Non industry     Industry
## 689 Non industry Non industry Non industry     Industry     Industry
## 690 Non industry Non industry     Industry     Industry     Industry
## 
## $sample_size
##             1        2        3        4        5        6        7        8
## 350  4.094345 3.688879 6.719013 5.598422 6.214608 5.634790 6.784457 6.551080
## 679  4.787492 2.995732 4.634729 3.583519 4.007333 3.401197 4.905275 3.091042
## 1640 4.007333 3.583519 4.564348 3.688879 4.094345 4.828314 3.688879 2.995732
## 1642 4.787492 5.298317 6.907755 4.382027 4.094345 4.605170 3.637586 4.867534
##             9       10
## 350  5.703782 5.192957
## 679  3.688879 4.094345
## 1640 2.708050 4.158883
## 1642 4.941642 5.493061
## 
## $vaccine
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $conventional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $traditional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $subject_blind
##     1   2   3  4  5   6   7   8   9  10
## 1  No Yes  No No No Yes Yes  No Yes Yes
## 3  No  No Yes No No  No Yes Yes Yes  No
## 5 Yes  No  No No No  No Yes Yes  No  No
## 6  No Yes Yes No No  No Yes  No Yes Yes
## 7  No  No  No No No Yes  No  No  No  No
## 8  No  No  No No No  No Yes  No Yes  No
## 
## $caregiver_blind
##     1   2   3   4   5  6   7   8  9 10
## 1  No  No  No  No Yes No  No Yes No No
## 3  No  No  No  No  No No  No Yes No No
## 5 Yes  No  No  No  No No  No  No No No
## 6  No  No  No  No  No No Yes  No No No
## 7  No Yes Yes  No  No No  No  No No No
## 8 Yes  No  No Yes Yes No  No  No No No
## 
## $investigator_blind
##     1   2   3   4   5   6  7   8   9  10
## 1  No Yes  No  No Yes  No No Yes Yes Yes
## 3  No  No Yes  No Yes Yes No  No Yes  No
## 5  No  No Yes  No  No Yes No  No Yes Yes
## 6  No  No  No Yes  No Yes No  No  No Yes
## 7 Yes  No  No  No Yes  No No  No  No  No
## 8  No  No  No  No Yes Yes No  No Yes  No
## 
## $outcome_blind
##    1   2   3   4  5   6  7   8   9 10
## 1 No  No  No Yes No Yes No Yes Yes No
## 3 No  No  No  No No  No No  No  No No
## 5 No  No Yes  No No  No No  No Yes No
## 6 No Yes  No  No No  No No  No  No No
## 7 No  No Yes Yes No  No No  No Yes No
## 8 No  No  No  No No  No No  No  No No
## 
## $analyst_blind
##    1  2  3  4  5  6  7  8  9 10
## 1 No No No No No No No No No No
## 3 No No No No No No No No No No
## 5 No No No No No No No No No No
## 6 No No No No No No No No No No
## 7 No No No No No No No No No No
## 8 No No No No No No No No No No
## 
## $covid
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $start_year
##       1    2    3    4    5    6    7    8    9   10
## 32 2020 2020 2019 2018 2020 2020 2020 2020 2016 2020
## 36 2012 2016 2017 2020 2020 2016 2020 2018 2020 2019
## 41 2020 2016 2019 2019 2020 2019 2020 2016 2016 2020
## 63 2020 2016 2017 2019 2017 2020 2020 2020 2020 2019
## 66 2012 2020 2016 2018 2020 2020 2020 2020 2016 2020
## 72 2020 2016 2017 2016 2020 2019 2020 2020 2020 2020
# In order to get the third imputed data set, use the complete() function

# c3 <- complete(imp, 3) 
# md.pattern(c3)

# c.long <- complete(imp, "long") # "broad"

Only impute the three variables listed above.

method_vector <- indication_mice$method
method_vector[19:length(method_vector)] <- ""

# methods(mice)

Generate 10 imputed datasets using the updated method vector.

indication_mice <- mice::mice(indication_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 150
plot(indication_mice)

Check that there is no trend with further iterations and that the lines mix.

indication_mice_40 <- mice::mice.mids(indication_mice, maxit = 40, print = FALSE)
plot(indication_mice_40)

Plot of observed (blue) and imputed (red) phase:

mice::stripplot(indication_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(indication_mice)

7.5.2.2.1 Analysis

Logistic regression analysis on the multiply imputed data.

indication_direct_models_mice <- lapply(myvars, function (x){
  fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(indication_direct_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(indication_direct_models_mice) <- myvars
lapply(indication_direct_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                         term    estimate  std.error   statistic       df
## 1                (Intercept) -0.44457936 0.68472653 -0.64928017 1593.064
## 2                  covidTRUE -0.79312207 0.19915981 -3.98233990 1600.216
## 3      source_registryCT.gov -1.30017243 0.49562668 -2.62328981 1600.137
## 4        source_registryCTRI -0.95435273 0.56883548 -1.67773067 1600.462
## 5       source_registryEUCTR -2.23460818 0.58194564 -3.83989158 1599.909
## 6        source_registryIRCT -0.59170126 0.56433545 -1.04849209 1599.866
## 7        source_registryJPRN -3.20584104 0.60645751 -5.28617588 1599.188
## 8       source_registryOther -1.43936808 0.60248929 -2.38903515 1599.633
## 9         phase_cleanPhase 2  0.87141484 0.27313916  3.19036946 1596.853
## 10        phase_cleanPhase 3  0.90214324 0.32042593  2.81545016 1573.425
## 11        phase_cleanPhase 4  0.16034556 0.30814163  0.52036320 1578.764
## 12      phase_cleanUndefined -0.11067291 0.31174235 -0.35501405 1599.644
## 13          region_AfricaYes  0.64783322 0.49408590  1.31117529 1600.414
## 14       region_N_AmericaYes -0.17891972 0.25403821 -0.70430242 1597.006
## 15       region_L_AmericaYes -0.04464200 0.33394166 -0.13368202 1600.073
## 16            region_AsiaYes -0.10862529 0.26691616 -0.40696406 1596.655
## 17          region_EuropeYes -0.01506037 0.28234915 -0.05333953 1599.029
## 18         region_OceaniaYes  0.32854311 0.51084523  0.64313630 1597.107
## 19            multicentreYes  0.04911306 0.20974009  0.23416151 1598.487
## 20 primary_purposePrevention  0.25450023 0.31394237  0.81065907 1599.662
## 21  primary_purposeTreatment  0.88079991 0.27722032  3.17725593 1599.821
## 22  sponsor_typeInvestigator  0.04286251 0.40943407  0.10468722 1597.602
## 23  sponsor_typeNon industry  0.13661701 0.22851966  0.59783484 1438.044
## 24               sample_size  0.69979352 0.08588239  8.14827716 1520.618
##         p.value
## 1  5.162509e-01
## 2  7.129996e-05
## 3  8.791258e-03
## 4  9.359494e-02
## 5  1.278882e-04
## 6  2.945703e-01
## 7  1.421266e-07
## 8  1.700797e-02
## 9  1.448518e-03
## 10 4.931611e-03
## 11 6.028834e-01
## 12 7.226259e-01
## 13 1.899866e-01
## 14 4.813471e-01
## 15 8.936708e-01
## 16 6.840889e-01
## 17 9.574680e-01
## 18 5.202280e-01
## 19 8.148896e-01
## 20 4.176821e-01
## 21 1.515154e-03
## 22 9.166371e-01
## 23 5.500443e-01
## 24 8.881784e-16
## 
## $randomisation
##                         term     estimate  std.error    statistic       df
## 1                (Intercept) -1.194287510 0.55858293 -2.138066620 1594.421
## 2                  covidTRUE -0.769149836 0.17009034 -4.522007797 1600.539
## 3      source_registryCT.gov -0.612548428 0.36375194 -1.683972966 1600.102
## 4        source_registryCTRI -0.201444360 0.44380492 -0.453902946 1600.296
## 5       source_registryEUCTR -0.952540575 0.45472681 -2.094753503 1600.011
## 6        source_registryIRCT -0.309759773 0.40564902 -0.763615240 1599.326
## 7        source_registryJPRN -2.327311916 0.49749370 -4.678073107 1597.755
## 8       source_registryOther -0.696934384 0.47698442 -1.461126102 1599.603
## 9         phase_cleanPhase 2  0.795970854 0.23952133  3.323173156 1589.318
## 10        phase_cleanPhase 3  0.848847462 0.27743937  3.059578194 1589.395
## 11        phase_cleanPhase 4  0.074697604 0.26496734  0.281912494 1599.551
## 12      phase_cleanUndefined -0.074763369 0.27447340 -0.272388398 1595.511
## 13          region_AfricaYes  0.828715576 0.44913021  1.845156624 1600.538
## 14       region_N_AmericaYes -0.533889178 0.22546417 -2.367955774 1597.564
## 15       region_L_AmericaYes  0.080502488 0.30388967  0.264906960 1600.179
## 16            region_AsiaYes -0.122130262 0.23962348 -0.509675691 1597.840
## 17          region_EuropeYes -0.269747792 0.24706545 -1.091807015 1599.148
## 18         region_OceaniaYes  0.364121351 0.45404099  0.801956997 1598.626
## 19            multicentreYes  0.290782656 0.17995588  1.615855258 1599.485
## 20 primary_purposePrevention  0.068676912 0.27026777  0.254106928 1599.662
## 21  primary_purposeTreatment  1.019558885 0.24326225  4.191192345 1600.299
## 22  sponsor_typeInvestigator  0.001789449 0.36841923  0.004857101 1598.718
## 23  sponsor_typeNon industry -0.051288989 0.20101615 -0.255148603 1511.554
## 24               sample_size  0.623848183 0.07300873  8.544843231 1561.302
##         p.value
## 1  3.266299e-02
## 2  6.576294e-06
## 3  9.238193e-02
## 4  6.499602e-01
## 5  3.634983e-02
## 6  4.452092e-01
## 7  3.139994e-06
## 8  1.441772e-01
## 9  9.102204e-04
## 10 2.253554e-03
## 11 7.780471e-01
## 12 7.853586e-01
## 13 6.519939e-02
## 14 1.800517e-02
## 15 7.911153e-01
## 16 6.103491e-01
## 17 2.750824e-01
## 18 4.226970e-01
## 19 1.063228e-01
## 20 7.994456e-01
## 21 2.926566e-05
## 22 9.961252e-01
## 23 7.986430e-01
## 24 0.000000e+00
## 
## $blinding
##                         term    estimate  std.error   statistic       df
## 1                (Intercept) -1.58908581 0.43696132 -3.63667384 1598.302
## 2                  covidTRUE -0.98732558 0.12412289 -7.95441959 1600.368
## 3      source_registryCT.gov  0.80313242 0.28597426  2.80840806 1600.766
## 4        source_registryCTRI  0.03450912 0.34628062  0.09965652 1600.668
## 5       source_registryEUCTR  0.25549607 0.34734831  0.73556158 1600.685
## 6        source_registryIRCT  1.10485101 0.31127316  3.54945797 1600.668
## 7        source_registryJPRN -0.21503829 0.44020449 -0.48849635 1597.068
## 8       source_registryOther -0.02627937 0.36488703 -0.07202055 1600.663
## 9         phase_cleanPhase 2  0.48806949 0.20823474  2.34384275 1599.830
## 10        phase_cleanPhase 3  0.11690108 0.22340582  0.52326785 1599.955
## 11        phase_cleanPhase 4 -0.47795748 0.23511757 -2.03284457 1599.331
## 12      phase_cleanUndefined -0.05468179 0.24372521 -0.22435837 1598.526
## 13          region_AfricaYes  0.36251480 0.23379322  1.55057877 1600.810
## 14       region_N_AmericaYes -0.22888238 0.15811427 -1.44757577 1599.638
## 15       region_L_AmericaYes  0.54534625 0.20464557  2.66483289 1600.416
## 16            region_AsiaYes -0.12031202 0.16603890 -0.72460140 1600.312
## 17          region_EuropeYes -0.17056651 0.17294261 -0.98626078 1600.425
## 18         region_OceaniaYes  0.58358419 0.31842434  1.83272484 1600.589
## 19            multicentreYes  0.18776497 0.13095073  1.43385971 1600.816
## 20 primary_purposePrevention  0.51969705 0.24466619  2.12410657 1600.671
## 21  primary_purposeTreatment  0.40616298 0.22209853  1.82875128 1600.652
## 22  sponsor_typeInvestigator -0.02396366 0.27477845 -0.08721083 1586.386
## 23  sponsor_typeNon industry -0.59022606 0.15161707 -3.89287341 1582.725
## 24               sample_size  0.31587109 0.04842993  6.52222955 1597.516
##         p.value
## 1  2.849435e-04
## 2  3.330669e-15
## 3  5.039155e-03
## 4  9.206295e-01
## 5  4.621053e-01
## 6  3.971802e-04
## 7  6.252654e-01
## 8  9.425946e-01
## 9  1.920804e-02
## 10 6.008603e-01
## 11 4.223311e-02
## 12 8.225071e-01
## 13 1.212003e-01
## 14 1.479317e-01
## 15 7.780068e-03
## 16 4.688025e-01
## 17 3.241542e-01
## 18 6.702917e-02
## 19 1.518076e-01
## 20 3.381418e-02
## 21 6.762287e-02
## 22 9.305149e-01
## 23 1.031798e-04
## 24 9.269185e-11
## 
## $prospective
##                         term    estimate  std.error   statistic       df
## 1                (Intercept) -0.75517256 0.42772090 -1.76557320 1593.218
## 2                  covidTRUE  0.06653992 0.12718724  0.52316505 1599.975
## 3      source_registryCT.gov  0.15562652 0.25960741  0.59946872 1600.019
## 4        source_registryCTRI  2.29377635 0.45895856  4.99778528 1600.153
## 5       source_registryEUCTR  0.25425402 0.33623656  0.75617602 1600.343
## 6        source_registryIRCT -0.83187679 0.28893317 -2.87913219 1595.844
## 7        source_registryJPRN  0.07333326 0.41435204  0.17698299 1599.649
## 8       source_registryOther  0.03287726 0.34924311  0.09413861 1600.272
## 9         phase_cleanPhase 2  0.33163662 0.21392262  1.55026434 1593.456
## 10        phase_cleanPhase 3  0.07682503 0.22936019  0.33495363 1572.113
## 11        phase_cleanPhase 4 -0.03802994 0.23166063 -0.16416227 1596.806
## 12      phase_cleanUndefined -0.17161955 0.24408737 -0.70310703 1587.019
## 13          region_AfricaYes  0.54178023 0.26407030  2.05165150 1600.807
## 14       region_N_AmericaYes  0.65450271 0.17793035  3.67842093 1600.748
## 15       region_L_AmericaYes -0.35420678 0.21123628 -1.67682738 1600.746
## 16            region_AsiaYes  0.01217093 0.17815220  0.06831761 1600.481
## 17          region_EuropeYes  0.23756270 0.19050099  1.24704184 1600.532
## 18         region_OceaniaYes  1.25152681 0.40255741  3.10893995 1600.789
## 19            multicentreYes  0.46159292 0.13647021  3.38237137 1600.695
## 20 primary_purposePrevention -0.28986281 0.24512881 -1.18249185 1600.208
## 21  primary_purposeTreatment  0.12275457 0.22142511  0.55438412 1600.334
## 22  sponsor_typeInvestigator  0.27514737 0.30685720  0.89666261 1594.032
## 23  sponsor_typeNon industry  0.11781716 0.15833678  0.74409217 1592.379
## 24               sample_size  0.14095973 0.04889687  2.88279635 1572.067
##         p.value
## 1  7.765900e-02
## 2  6.009318e-01
## 3  5.489452e-01
## 4  6.435482e-07
## 5  4.496550e-01
## 6  4.040868e-03
## 7  8.595442e-01
## 8  9.250108e-01
## 9  1.212767e-01
## 10 7.377047e-01
## 11 8.696242e-01
## 12 4.820922e-01
## 13 4.036593e-02
## 14 2.424459e-04
## 15 9.377149e-02
## 16 9.455413e-01
## 17 2.125646e-01
## 18 1.910668e-03
## 19 7.359496e-04
## 20 2.371863e-01
## 21 5.793934e-01
## 22 3.700344e-01
## 23 4.569305e-01
## 24 3.995209e-03
sum_indication_direct_mice <- lapply(indication_direct_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_direct_mice <- lapply(sum_indication_direct_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})

pool_OR_indication_direct_mice <- do.call(rbind.data.frame, pool_OR_indication_direct_mice)
pool_OR_indication_direct_mice$Analysis <- "IM direct (5)"

# sjPlot::tab_model(indication_direct_models_mice)
# sjPlot::plot_models(indication_direct_models_mice, prefix.labels = "varname") + theme_bw()

fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data

mice::densityplot(indication_mice, ~ phase_clean)

mice::densityplot(indication_mice, ~ sponsor_type)

mice::densityplot(indication_mice, ~ sample_size)

7.5.2.2.2 Compare complete case analysis with MICE analysis for the each outcome
7.5.2.2.2.1 Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.8768   0.1994   0.3271   0.4872   2.0094  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.40142    0.68659  -0.585 0.558774    
## covidTRUE                 -0.78471    0.20291  -3.867 0.000110 ***
## source_registryCT.gov     -1.30534    0.49569  -2.633 0.008455 ** 
## source_registryCTRI       -0.98018    0.57007  -1.719 0.085540 .  
## source_registryEUCTR      -2.23270    0.58280  -3.831 0.000128 ***
## source_registryIRCT       -0.57126    0.56479  -1.011 0.311803    
## source_registryJPRN       -2.97240    0.65426  -4.543 5.54e-06 ***
## source_registryOther      -1.32440    0.62151  -2.131 0.033095 *  
## phase_cleanPhase 2         0.87639    0.27466   3.191 0.001419 ** 
## phase_cleanPhase 3         0.86491    0.32103   2.694 0.007056 ** 
## phase_cleanPhase 4         0.15861    0.30894   0.513 0.607674    
## phase_cleanUndefined      -0.07839    0.31609  -0.248 0.804143    
## region_AfricaYes           0.61576    0.49438   1.246 0.212940    
## region_N_AmericaYes       -0.22326    0.25664  -0.870 0.384330    
## region_L_AmericaYes        0.06006    0.34796   0.173 0.862954    
## region_AsiaYes            -0.14831    0.26834  -0.553 0.580480    
## region_EuropeYes          -0.07272    0.28773  -0.253 0.800469    
## region_OceaniaYes          0.22712    0.51379   0.442 0.658453    
## multicentreYes             0.15636    0.21642   0.722 0.469994    
## primary_purposePrevention  0.28722    0.31873   0.901 0.367510    
## primary_purposeTreatment   0.85701    0.28053   3.055 0.002251 ** 
## sponsor_typeInvestigator   0.05360    0.40996   0.131 0.895986    
## sponsor_typeNon industry   0.15374    0.22947   0.670 0.502866    
## sample_size                0.68906    0.08597   8.015 1.10e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1144.33  on 1609  degrees of freedom
## Residual deviance:  946.04  on 1586  degrees of freedom
##   (47 observations deleted due to missingness)
## AIC: 994.04
## 
## Number of Fisher Scoring iterations: 6
fit <- with(indication_mice, glm(as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.2.2.2 Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4006   0.2319   0.4020   0.5920   2.2030  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -1.163344   0.560294  -2.076 0.037865 *  
## covidTRUE                 -0.721661   0.172022  -4.195 2.73e-05 ***
## source_registryCT.gov     -0.631524   0.363234  -1.739 0.082102 .  
## source_registryCTRI       -0.239598   0.444577  -0.539 0.589932    
## source_registryEUCTR      -0.995336   0.455771  -2.184 0.028973 *  
## source_registryIRCT       -0.299489   0.405439  -0.739 0.460102    
## source_registryJPRN       -2.135571   0.541098  -3.947 7.92e-05 ***
## source_registryOther      -0.414378   0.501303  -0.827 0.408463    
## phase_cleanPhase 2         0.810087   0.240263   3.372 0.000747 ***
## phase_cleanPhase 3         0.828245   0.278058   2.979 0.002895 ** 
## phase_cleanPhase 4         0.075342   0.265730   0.284 0.776771    
## phase_cleanUndefined      -0.030325   0.277135  -0.109 0.912866    
## region_AfricaYes           0.799657   0.448957   1.781 0.074889 .  
## region_N_AmericaYes       -0.536686   0.227304  -2.361 0.018221 *  
## region_L_AmericaYes        0.120126   0.311840   0.385 0.700077    
## region_AsiaYes            -0.142274   0.240638  -0.591 0.554361    
## region_EuropeYes          -0.240252   0.252141  -0.953 0.340666    
## region_OceaniaYes          0.198177   0.455835   0.435 0.663741    
## multicentreYes             0.345967   0.184246   1.878 0.060417 .  
## primary_purposePrevention  0.081070   0.273621   0.296 0.767012    
## primary_purposeTreatment   0.965373   0.245692   3.929 8.52e-05 ***
## sponsor_typeInvestigator  -0.002579   0.368211  -0.007 0.994411    
## sponsor_typeNon industry  -0.041474   0.201824  -0.205 0.837186    
## sample_size                0.614657   0.073154   8.402  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1459.4  on 1609  degrees of freedom
## Residual deviance: 1211.8  on 1586  degrees of freedom
##   (47 observations deleted due to missingness)
## AIC: 1259.8
## 
## Number of Fisher Scoring iterations: 5
fit <- with(indication_mice, glm(as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.2.2.3 Blinding
summary(glm(as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(indication_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2384  -1.0519   0.5217   0.9879   2.1342  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -1.55925    0.43688  -3.569 0.000358 ***
## covidTRUE                 -0.96597    0.12477  -7.742 9.76e-15 ***
## source_registryCT.gov      0.79869    0.28583   2.794 0.005202 ** 
## source_registryCTRI        0.05285    0.34712   0.152 0.878997    
## source_registryEUCTR       0.26120    0.34740   0.752 0.452128    
## source_registryIRCT        1.10526    0.31118   3.552 0.000383 ***
## source_registryJPRN        0.31122    0.47865   0.650 0.515565    
## source_registryOther       0.04078    0.36688   0.111 0.911494    
## phase_cleanPhase 2         0.49032    0.20826   2.354 0.018554 *  
## phase_cleanPhase 3         0.11831    0.22348   0.529 0.596527    
## phase_cleanPhase 4        -0.46494    0.23531  -1.976 0.048172 *  
## phase_cleanUndefined      -0.05946    0.24425  -0.243 0.807653    
## region_AfricaYes           0.35186    0.23350   1.507 0.131827    
## region_N_AmericaYes       -0.24667    0.15829  -1.558 0.119154    
## region_L_AmericaYes        0.55333    0.20587   2.688 0.007193 ** 
## region_AsiaYes            -0.13132    0.16598  -0.791 0.428836    
## region_EuropeYes          -0.18028    0.17365  -1.038 0.299178    
## region_OceaniaYes          0.53145    0.31786   1.672 0.094533 .  
## multicentreYes             0.22378    0.13191   1.697 0.089791 .  
## primary_purposePrevention  0.51475    0.24562   2.096 0.036108 *  
## primary_purposeTreatment   0.38330    0.22270   1.721 0.085217 .  
## sponsor_typeInvestigator  -0.01946    0.27477  -0.071 0.943552    
## sponsor_typeNon industry  -0.58791    0.15207  -3.866 0.000111 ***
## sample_size                0.31023    0.04849   6.397 1.58e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2222.1  on 1609  degrees of freedom
## Residual deviance: 1936.2  on 1586  degrees of freedom
##   (47 observations deleted due to missingness)
## AIC: 1984.2
## 
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.5.2.2.2.4 Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(indication_direct_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4501  -1.0823   0.6158   0.8673   1.6632  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.78420    0.42822  -1.831 0.067054 .  
## covidTRUE                  0.01711    0.12844   0.133 0.894015    
## source_registryCT.gov      0.15752    0.25972   0.606 0.544191    
## source_registryCTRI        2.28225    0.45985   4.963 6.94e-07 ***
## source_registryEUCTR       0.29496    0.33695   0.875 0.381364    
## source_registryIRCT       -0.83231    0.28898  -2.880 0.003975 ** 
## source_registryJPRN       -0.16557    0.45385  -0.365 0.715246    
## source_registryOther      -0.05469    0.35306  -0.155 0.876896    
## phase_cleanPhase 2         0.31075    0.21410   1.451 0.146653    
## phase_cleanPhase 3         0.07361    0.22925   0.321 0.748161    
## phase_cleanPhase 4        -0.06518    0.23208  -0.281 0.778819    
## phase_cleanUndefined      -0.18603    0.24509  -0.759 0.447841    
## region_AfricaYes           0.53065    0.26437   2.007 0.044724 *  
## region_N_AmericaYes        0.64307    0.17803   3.612 0.000304 ***
## region_L_AmericaYes       -0.34954    0.21266  -1.644 0.100246    
## region_AsiaYes            -0.00513    0.17851  -0.029 0.977073    
## region_EuropeYes           0.19764    0.19096   1.035 0.300689    
## region_OceaniaYes          1.28801    0.40544   3.177 0.001489 ** 
## multicentreYes             0.45061    0.13767   3.273 0.001064 ** 
## primary_purposePrevention -0.28508    0.24671  -1.156 0.247860    
## primary_purposeTreatment   0.17350    0.22303   0.778 0.436608    
## sponsor_typeInvestigator   0.27612    0.30738   0.898 0.369017    
## sponsor_typeNon industry   0.11510    0.15904   0.724 0.469269    
## sample_size                0.15266    0.04908   3.111 0.001867 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2033.0  on 1609  degrees of freedom
## Residual deviance: 1801.7  on 1586  degrees of freedom
##   (47 observations deleted due to missingness)
## AIC: 1849.7
## 
## Number of Fisher Scoring iterations: 5
fit <- with(indication_mice, glm(as.formula(paste("prospective", "~", paste(indication_direct_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)

7.6 Total effect (analyses 6 and 7)

We repeat the main analysis adjusting only for confounding variables.

7.6.1 Complete case analysis

7.6.1.1 Main dataset

# define adjustment set. This is the same for the main and indication dataset
main_total_adjustment <- c("covid", "sponsor_type")

indication_total_adjustment <- main_total_adjustment

main_total_models <- lapply(myvars, function (x){
  glm(as.formula(paste(x, "~", paste(main_total_adjustment, collapse = "+"))),
      family = binomial(link = "logit"),
      data = main_dataset)
})

names(main_total_models) <- paste0(myvars, "_(total)")
# lapply(main_total_models, summary)

sjPlot::tab_model(main_total_models)
  Control arm randomisation Blinding Prospective registration
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 2.88 2.31 – 3.62 <0.001 2.24 1.82 – 2.77 <0.001 1.18 0.98 – 1.43 0.085 4.94 3.90 – 6.33 <0.001
covidTRUE 2.19 1.68 – 2.88 <0.001 1.93 1.52 – 2.45 <0.001 0.98 0.80 – 1.20 0.828 0.79 0.63 – 0.99 0.040
Sponsor type:
Investigator
0.82 0.48 – 1.43 0.460 0.88 0.54 – 1.48 0.620 0.82 0.53 – 1.28 0.385 1.22 0.69 – 2.29 0.515
Sponsor type: Non
industry
1.18 0.90 – 1.56 0.233 1.10 0.85 – 1.41 0.461 0.63 0.50 – 0.78 <0.001 0.44 0.33 – 0.57 <0.001
Observations 1608 1608 1608 1608
R2 Tjur 0.025 0.021 0.012 0.036
sjPlot::plot_models(main_total_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

# library(sjmisc)

7.6.1.2 Indication-matched dataset

We repeat the same analyses on the indication-matched dataset.

# logistic regression for each outcome
indication_total_models <- lapply(myvars, function (x){
  glm(as.formula(paste(x, "~", paste(indication_total_adjustment, collapse = "+"))),
      family = binomial(link = "logit"),
      data = indication_dataset)
})

names(indication_total_models) <- paste0(myvars, "_(total_indication)")
# lapply(indication_total_models, summary)

# lapply(indication_total_models, function(x) knitr::kable(exp(confint(x)), digits = 2))
# lapply(indication_total_models, function(x) print(sjPlot::tab_model(x)))

sjPlot::tab_model(indication_total_models)
  Control arm randomisation Blinding Prospective registration
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 8.31 6.08 – 11.62 <0.001 6.00 4.54 – 8.04 <0.001 3.05 2.44 – 3.83 <0.001 2.72 2.17 – 3.43 <0.001
covidTRUE 0.92 0.68 – 1.25 0.600 0.97 0.75 – 1.25 0.819 0.53 0.43 – 0.65 <0.001 1.09 0.89 – 1.34 0.413
Sponsor type:
Investigator
0.79 0.41 – 1.62 0.495 0.82 0.45 – 1.56 0.517 0.65 0.41 – 1.06 0.080 1.25 0.73 – 2.22 0.430
Sponsor type: Non
industry
0.90 0.63 – 1.26 0.542 0.73 0.53 – 0.99 0.043 0.40 0.31 – 0.50 <0.001 0.62 0.49 – 0.79 <0.001
Observations 1639 1639 1639 1639
R2 Tjur 0.001 0.003 0.065 0.012
sjPlot::plot_models(indication_total_models, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

7.6.2 Multiple imputation

Phase, sample size (for 3 trials) and sponsor type have missing values. Here only sponsor type is to be imputed.

7.6.2.1 Main dataset

Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.

set.seed(5)

mice_in <- mice::mice(main_for_mice, maxit = 0, print = FALSE)
# mice_in <- mice::mice(main_dataset, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred

# warning message: Number of logged events: 1
correlation_matrix <- round(cor(sapply(main_for_mice[,c(myvars, main_total_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# correlation_matrix <- round(cor(na.omit(values)), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)

# labels_plot <- levels(melted_correlation_matrix[,1])
# labels_plot <- labels[levels(melted_correlation_matrix[,1]),]$short
labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))

ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),                                     
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
# panel.background = element_blank(),
  panel.background = element_rect(fill = "white"),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.7, 0.7),
  legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(main_for_mice, pred = mice::quickpred(main_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred

Modify the predictor matrix such that only sponsor type is imputed, using all variables.

predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("sponsor_type"))),] <- 0

Generate 10 imputed datasets using chained equations (using package mice).

main_mice <- mice::mice(main_for_mice, m = 10, pred = predictor_matrix , print = FALSE)
## Warning: Number of logged events: 50
attributes(main_mice)
## $names
##  [1] "data"            "imp"             "m"               "where"          
##  [5] "blocks"          "call"            "nmis"            "method"         
##  [9] "predictorMatrix" "visitSequence"   "formulas"        "post"           
## [13] "blots"           "ignore"          "seed"            "iteration"      
## [17] "lastSeedValue"   "chainMean"       "chainVar"        "loggedEvents"   
## [21] "version"         "date"           
## 
## $class
## [1] "mids"

Original data:

main_mice$data %>% head

Imputed datasets:

main_mice$imp %>% map(head)
## $control_arm
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $randomisation
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $blinding
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $prospective
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $source_registry
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $phase_clean
##           1         2         3         4         5       6       7       8
## 758 Phase 3   Phase 3   Phase 1   Phase 3 Undefined Phase 1 Phase 2 Phase 3
## 775 Phase 2   Phase 4   Phase 4 Undefined   Phase 2 Phase 1 Phase 2 Phase 1
## 790 Phase 3   Phase 2 Undefined   Phase 2   Phase 2 Phase 2 Phase 3 Phase 4
## 800 Phase 2 Undefined   Phase 4 Undefined   Phase 2 Phase 2 Phase 2 Phase 3
## 802 Phase 2 Undefined   Phase 3   Phase 2   Phase 3 Phase 3 Phase 4 Phase 3
## 809 Phase 4   Phase 1   Phase 2   Phase 2   Phase 2 Phase 2 Phase 3 Phase 3
##           9        10
## 758 Phase 4   Phase 4
## 775 Phase 4   Phase 2
## 790 Phase 2   Phase 3
## 800 Phase 3 Undefined
## 802 Phase 1   Phase 3
## 809 Phase 2   Phase 3
## 
## $region_Africa
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_N_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_L_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Asia
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Europe
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Oceania
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $multicentre
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $primary_purpose
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $sponsor_type
##                1            2            3            4            5
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Non industry Investigator Non industry Investigator Non industry
## 227     Industry     Industry     Industry Non industry     Industry
## 272 Non industry Non industry Non industry Investigator Non industry
## 765     Industry Non industry Non industry     Industry Non industry
## 766 Non industry Non industry Non industry Non industry Non industry
##                6            7            8            9           10
## 133 Non industry Non industry Non industry Non industry Non industry
## 145 Investigator Non industry Non industry     Industry Non industry
## 227     Industry Non industry     Industry Non industry     Industry
## 272 Non industry Non industry Non industry Non industry Non industry
## 765 Investigator     Industry     Industry Non industry     Industry
## 766 Non industry Non industry Non industry Non industry Non industry
## 
## $sample_size
##             1        2        3        4        5        6        7        8
## 1646 2.995732 6.366470 6.620073 6.492240 3.761200  4.65396 2.708050 3.465736
## 1648 4.094345 5.703782 5.247024 3.401197 3.465736 10.15813 3.401197 4.394449
##             9       10
## 1646 3.401197 4.248495
## 1648 3.401197 4.094345
## 
## $vaccine
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $conventional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $traditional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $subject_blind
##      1   2   3   4   5   6   7   8   9  10
## 1   No Yes Yes  No  No  No  No  No  No Yes
## 2  Yes Yes  No  No Yes  No  No Yes Yes  No
## 3  Yes Yes Yes  No  No Yes  No  No Yes  No
## 6  Yes Yes  No Yes Yes  No Yes  No  No  No
## 8  Yes Yes Yes  No  No Yes Yes  No  No Yes
## 10  No  No Yes  No  No  No  No  No Yes  No
## 
## $caregiver_blind
##     1   2   3   4   5  6   7   8   9  10
## 1  No  No Yes  No Yes No  No Yes Yes  No
## 2  No  No  No  No  No No  No Yes  No  No
## 3  No Yes  No  No  No No Yes Yes  No  No
## 6  No Yes  No  No  No No  No  No  No  No
## 8  No  No  No Yes  No No  No  No  No  No
## 10 No  No  No  No  No No Yes  No  No Yes
## 
## $investigator_blind
##      1   2   3  4   5   6   7   8  9  10
## 1   No  No Yes No  No Yes  No  No No  No
## 2   No Yes  No No  No Yes Yes Yes No Yes
## 3  Yes  No  No No  No  No  No  No No  No
## 6   No  No  No No  No  No  No  No No Yes
## 8  Yes  No Yes No Yes  No  No  No No  No
## 10  No  No  No No  No  No  No  No No  No
## 
## $outcome_blind
##      1   2   3  4   5   6   7   8   9  10
## 1   No Yes  No No  No Yes Yes  No  No  No
## 2   No  No Yes No  No  No  No  No  No  No
## 3   No  No  No No  No  No  No  No  No Yes
## 6  Yes  No  No No  No  No  No  No Yes  No
## 8   No  No  No No Yes Yes  No Yes  No  No
## 10  No  No  No No  No  No Yes  No  No  No
## 
## $analyst_blind
##     1  2  3  4  5  6  7  8  9 10
## 1  No No No No No No No No No No
## 2  No No No No No No No No No No
## 3  No No No No No No No No No No
## 6  No No No No No No No No No No
## 8  No No No No No No No No No No
## 10 No No No No No No No No No No
## 
## $covid
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $start_year
##       1    2    3    4    5    6    7    8    9   10
## 32 2020 2019 2020 2020 2018 2020 2020 2019 2019 2020
## 34 2020 2019 2020 2019 2020 2019 2020 2019 2019 2019
## 35 2020 2020 2019 2020 2018 2020 2020 2020 2019 2020
## 48 2020 2018 2020 2019 2018 2020 2020 2020 2020 2020
## 60 2020 2019 2020 2019 2018 2019 2020 2020 2019 2019
## 66 2020 2020 2020 2020 2020 2019 2019 2020 2019 2019
# In order to get the third imputed data set, use the complete() function

# c3 <- complete(imp, 3) 
# md.pattern(c3)

# c.long <- complete(imp, "long") # "broad"

Only impute sponsor type.

method_vector <- main_mice$method
method_vector[-15] <- ""

# methods(mice)

Generate 10 imputed datasets using the updated method vector.

main_mice <- mice::mice(main_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
plot(main_mice)

Check that there is no trend with further iterations and that the lines mix.

main_mice_40 <- mice::mice.mids(main_mice, maxit = 40, print = FALSE)
plot(main_mice_40)

Plot of observed (blue) and imputed (red) phase:

mice::stripplot(main_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(main_mice)

7.6.2.1.1 Analysis

Logistic regression analysis on the multiply imputed data.

main_total_models_mice <- lapply(myvars, function (x){
  fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(main_total_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(main_total_models_mice) <- myvars
lapply(main_total_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                       term   estimate std.error statistic        df
## 1              (Intercept)  1.0313801 0.1130089  9.126537 1555.5636
## 2                covidTRUE  0.7899260 0.1351990  5.842692 1631.8535
## 3 sponsor_typeInvestigator -0.2952479 0.2602657 -1.134409  649.2243
## 4 sponsor_typeNon industry  0.1931466 0.1401995  1.377655 1580.7096
##        p.value
## 1 0.000000e+00
## 2 6.188265e-09
## 3 2.570415e-01
## 4 1.685048e-01
## 
## $randomisation
##                       term   estimate std.error  statistic        df
## 1              (Intercept)  0.7908084 0.1058102  7.4738425 1578.3533
## 2                covidTRUE  0.6572158 0.1207774  5.4415445 1632.1633
## 3 sponsor_typeInvestigator -0.1900314 0.2458600 -0.7729251  746.8693
## 4 sponsor_typeNon industry  0.1102592 0.1283830  0.8588302 1592.9243
##        p.value
## 1 1.283418e-13
## 2 6.085322e-08
## 3 4.398112e-01
## 4 3.905634e-01
## 
## $blinding
##                       term     estimate  std.error   statistic        df
## 1              (Intercept)  0.100787372 0.09528738  1.05772009 1576.1721
## 2                covidTRUE  0.008050086 0.10175125  0.07911535 1632.5333
## 3 sponsor_typeInvestigator -0.381094393 0.22112888 -1.72340398  751.1032
## 4 sponsor_typeNon industry -0.423466399 0.11116590 -3.80931905 1595.8302
##        p.value
## 1 0.2903451504
## 2 0.9369505710
## 3 0.0852270346
## 4 0.0001446336
## 
## $prospective
##                       term    estimate std.error  statistic        df
## 1              (Intercept)  1.48940017 0.1198595 12.4262132 1518.1648
## 2                covidTRUE -0.15762260 0.1126263 -1.3995181 1632.1952
## 3 sponsor_typeInvestigator -0.04690336 0.2785860 -0.1683622  482.1239
## 4 sponsor_typeNon industry -0.76373089 0.1325281 -5.7627829 1524.0708
##        p.value
## 1 0.000000e+00
## 2 1.618477e-01
## 3 8.663689e-01
## 4 9.994819e-09
sum_main_total_mice <- lapply(main_total_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_main_total_mice <- lapply(sum_main_total_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})
do.call(rbind.data.frame, pool_OR_main_total_mice) 
pool_OR_main_total_mice <- do.call(rbind.data.frame, pool_OR_main_total_mice)
pool_OR_main_total_mice$Analysis <- "Main total (6)"

# sjPlot::tab_model(main_total_models_mice)
# sjPlot::plot_models(main_total_models_mice, prefix.labels = "varname") + theme_bw()

fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data

mice::densityplot(main_mice, ~ sponsor_type)

7.6.2.1.2 Compare complete case analysis with MICE analysis for the each outcome
7.6.2.1.2.1 Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(main_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0671   0.5013   0.5423   0.7178   0.8425  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0564     0.1147   9.212  < 2e-16 ***
## covidTRUE                  0.7861     0.1369   5.741 9.41e-09 ***
## sponsor_typeInvestigator  -0.2031     0.2749  -0.739    0.460    
## sponsor_typeNon industry   0.1683     0.1411   1.192    0.233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1535.7  on 1607  degrees of freedom
## Residual deviance: 1495.0  on 1604  degrees of freedom
##   (54 observations deleted due to missingness)
## AIC: 1503
## 
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.1.2.2 Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(main_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8693   0.6189   0.6189   0.8259   0.9064  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.80510    0.10714   7.515 5.71e-14 ***
## covidTRUE                 0.65540    0.12202   5.371 7.83e-08 ***
## sponsor_typeInvestigator -0.12772    0.25781  -0.495    0.620    
## sponsor_typeNon industry  0.09512    0.12910   0.737    0.461    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1770.1  on 1607  degrees of freedom
## Residual deviance: 1737.0  on 1604  degrees of freedom
##   (54 observations deleted due to missingness)
## AIC: 1745
## 
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.1.2.3 Blinding
summary(glm(as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(main_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.249  -1.051  -1.042   1.309   1.319  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.16579    0.09632   1.721   0.0852 .  
## covidTRUE                -0.02227    0.10260  -0.217   0.8282    
## sponsor_typeInvestigator -0.19603    0.22560  -0.869   0.3849    
## sponsor_typeNon industry -0.46985    0.11165  -4.208 2.57e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2220.2  on 1607  degrees of freedom
## Residual deviance: 2201.0  on 1604  degrees of freedom
##   (54 observations deleted due to missingness)
## AIC: 2209
## 
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.1.2.4 Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = main_dataset))
## 
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(main_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = main_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9748  -1.4106   0.6174   0.8728   0.9608  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.5974     0.1238  12.901  < 2e-16 ***
## covidTRUE                 -0.2355     0.1147  -2.054    0.040 *  
## sponsor_typeInvestigator   0.1990     0.3054   0.651    0.515    
## sponsor_typeNon industry  -0.8285     0.1349  -6.141 8.18e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1917.7  on 1607  degrees of freedom
## Residual deviance: 1857.7  on 1604  degrees of freedom
##   (54 observations deleted due to missingness)
## AIC: 1865.7
## 
## Number of Fisher Scoring iterations: 4
fit <- with(main_mice, glm(as.formula(paste("prospective", "~", paste(main_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)

7.6.2.2 Indication-matched dataset

Use the mice command to generate an initial predictor matrix (indicating which column variables are used to predict the row variables) without making any imputations.

set.seed(5)

mice_in <- mice::mice(indication_for_mice, maxit = 0, print = FALSE)
# mice_in <- mice::mice(indication_dataset, maxit = 0, print = FALSE)
predictor_matrix <- mice_in$pred

# warning message: Number of logged events: 1
correlation_matrix <- round(cor(sapply(indication_for_mice[,c(myvars, indication_total_adjustment)], as.numeric), use = "pairwise.complete.obs"), 2)
# correlation_matrix <- round(cor(na.omit(values)), 2)
# keep only upper triangle
correlation_matrix[lower.tri(correlation_matrix)] <- NA
melted_correlation_matrix <- reshape2::melt(correlation_matrix)
# lower triangle
melted_correlation_matrix$Var2 <- factor(melted_correlation_matrix$Var2, levels(melted_correlation_matrix$Var2)[seq(length(levels(melted_correlation_matrix$Var2)), 1, by = -1)], ordered = TRUE)

labels_plot <- sapply(levels(melted_correlation_matrix[,1]), function(x) gsub("_", "", x))

ggplot(data = melted_correlation_matrix, aes(x = Var1, y = Var2, fill = value)) + geom_tile(color = "white") + xlab("") + ylab("") + scale_fill_gradient2(low = "darkorchid4", high = "green3", mid = "white", midpoint = 0, na.value = "white", limit = c(-1, 1), name = "Correlation") + scale_x_discrete(labels = labels_plot) + scale_y_discrete(labels = labels_plot[length(labels_plot):1]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),                                     
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_rect(fill = "white"),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.7, 0.7),
  legend.direction = "horizontal") + guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))

# quickpred: quick selection procedure of predictors
# select predictors according to data relations with a minimum correlation of ρ = 0.25
mice_in2 <- mice::mice(indication_for_mice, pred = mice::quickpred(indication_for_mice, mincor = 0.25), print = FALSE)
mice_in2$pred

Modify the predictor matrix such that only sponsor type is imputed, using all variables.

predictor_matrix[,"covid"] <- 1 # no need to set element in row 'covid' to 0 because the entire row will be set to 0 below
predictor_matrix[which(!(row.names(predictor_matrix) %in% c("sponsor_type"))),] <- 0

Generate 10 imputed datasets using chained equations (using package mice).

indication_mice <- mice::mice(indication_for_mice, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
attributes(indication_mice)
## $names
##  [1] "data"            "imp"             "m"               "where"          
##  [5] "blocks"          "call"            "nmis"            "method"         
##  [9] "predictorMatrix" "visitSequence"   "formulas"        "post"           
## [13] "blots"           "ignore"          "seed"            "iteration"      
## [17] "lastSeedValue"   "chainMean"       "chainVar"        "loggedEvents"   
## [21] "version"         "date"           
## 
## $class
## [1] "mids"

Original data:

indication_mice$data %>% head

Imputed datasets:

indication_mice$imp %>% map(head)
## $control_arm
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $randomisation
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $blinding
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $prospective
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $source_registry
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $phase_clean
##             1       2       3       4         5       6         7         8
## 233 Undefined Phase 4 Phase 2 Phase 2 Undefined Phase 1   Phase 4   Phase 2
## 234   Phase 4 Phase 1 Phase 2 Phase 4   Phase 4 Phase 2   Phase 2   Phase 3
## 235   Phase 4 Phase 3 Phase 1 Phase 4 Undefined Phase 3   Phase 2   Phase 3
## 663   Phase 1 Phase 4 Phase 2 Phase 3 Undefined Phase 2   Phase 3   Phase 2
## 666   Phase 3 Phase 3 Phase 2 Phase 2   Phase 3 Phase 1 Undefined Undefined
## 686   Phase 2 Phase 4 Phase 2 Phase 3   Phase 3 Phase 1 Undefined   Phase 3
##             9      10
## 233   Phase 4 Phase 4
## 234   Phase 3 Phase 3
## 235 Undefined Phase 1
## 663   Phase 3 Phase 1
## 666   Phase 3 Phase 3
## 686   Phase 3 Phase 3
## 
## $region_Africa
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_N_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_L_America
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Asia
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Europe
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $region_Oceania
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $multicentre
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $primary_purpose
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $sponsor_type
##                1            2            3            4            5
## 121 Non industry Non industry Non industry Non industry Non industry
## 179     Industry Non industry Investigator     Industry     Industry
## 224 Non industry Non industry Non industry Non industry Non industry
## 686     Industry Non industry Non industry     Industry Non industry
## 689 Non industry Non industry Non industry Non industry Non industry
## 690     Industry     Industry     Industry Non industry Non industry
##                6            7            8            9           10
## 121 Non industry Non industry Non industry Non industry Non industry
## 179 Non industry     Industry     Industry Non industry Investigator
## 224 Investigator Non industry Non industry Non industry Non industry
## 686     Industry     Industry Non industry Non industry Non industry
## 689     Industry     Industry Non industry Non industry     Industry
## 690     Industry Non industry Non industry     Industry Non industry
## 
## $sample_size
##             1        2        3        4        5        6        7        8
## 350  2.995732 7.852828 4.605170 4.382027 8.665613 3.688879 5.703782 5.075174
## 679  2.995732 4.094345 4.691348 4.276666 5.521461 5.937536 5.703782 6.063785
## 1640 2.995732 4.787492 5.808142 4.276666 6.135565 5.937536 4.521789 5.075174
## 1642 4.605170 4.787492 5.808142 5.598422 3.637586 4.094345 5.703782 5.075174
##             9       10
## 350  5.298317 3.401197
## 679  4.605170 2.302585
## 1640 2.995732 2.302585
## 1642 4.094345 2.302585
## 
## $vaccine
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $conventional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $traditional
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $subject_blind
##     1   2   3   4   5   6   7   8   9  10
## 1 Yes  No  No Yes Yes Yes Yes Yes Yes  No
## 3 Yes  No Yes  No  No Yes Yes  No Yes Yes
## 5 Yes  No Yes  No Yes  No  No  No Yes  No
## 6 Yes Yes Yes  No  No  No  No  No Yes  No
## 7  No Yes Yes Yes Yes  No  No Yes  No Yes
## 8 Yes Yes  No  No Yes  No  No Yes  No Yes
## 
## $caregiver_blind
##     1   2   3   4  5   6   7   8   9  10
## 1  No Yes Yes  No No Yes  No  No  No Yes
## 3 Yes  No  No Yes No Yes  No  No Yes  No
## 5  No  No  No  No No  No  No Yes Yes  No
## 6 Yes  No  No  No No Yes  No  No  No  No
## 7  No  No  No  No No  No  No  No Yes  No
## 8 Yes  No  No  No No Yes Yes Yes  No Yes
## 
## $investigator_blind
##     1   2  3   4   5   6   7   8   9  10
## 1  No Yes No  No  No  No  No  No  No  No
## 3 Yes  No No  No  No  No Yes Yes  No Yes
## 5 Yes  No No  No  No  No Yes Yes  No Yes
## 6 Yes  No No Yes  No Yes Yes Yes Yes Yes
## 7  No  No No  No Yes  No  No  No  No  No
## 8 Yes Yes No  No  No  No  No  No Yes  No
## 
## $outcome_blind
##     1   2   3   4   5   6   7   8   9  10
## 1 Yes  No  No Yes  No Yes  No  No  No  No
## 3  No  No  No Yes  No Yes Yes Yes Yes Yes
## 5  No  No  No  No Yes  No  No Yes  No  No
## 6 Yes  No  No  No  No  No  No  No  No  No
## 7  No Yes  No  No Yes  No  No  No  No  No
## 8  No Yes Yes  No  No  No Yes  No  No  No
## 
## $analyst_blind
##    1  2  3  4  5  6  7  8  9 10
## 1 No No No No No No No No No No
## 3 No No No No No No No No No No
## 5 No No No No No No No No No No
## 6 No No No No No No No No No No
## 7 No No No No No No No No No No
## 8 No No No No No No No No No No
## 
## $covid
##  [1] 1  2  3  4  5  6  7  8  9  10
## <0 rows> (or 0-length row.names)
## 
## $start_year
##       1    2    3    4    5    6    7    8    9   10
## 32 2019 2020 2020 2020 2020 2018 2019 2020 2018 2016
## 36 2017 2020 2020 2020 2017 2016 2019 2017 2013 2020
## 41 2017 2020 2019 2020 2019 2018 2017 2019 2020 2020
## 63 2019 2016 2020 2020 2019 2016 2019 2020 2018 2020
## 66 2017 2020 2020 2020 2017 2020 2018 2019 2020 2020
## 72 2017 2016 2020 2016 2020 2018 2020 2020 2013 2020
# In order to get the third imputed data set, use the complete() function

Only impute sponsor type.

method_vector <- indication_mice$method
method_vector[-15] <- ""

# methods(mice)

Generate 10 imputed datasets using the updated method vector.

indication_mice <- mice::mice(indication_for_mice, method = method_vector, m = 10, pred = predictor_matrix, print = FALSE)
## Warning: Number of logged events: 50
plot(indication_mice)

Check that there is no trend with further iterations and that the lines mix.

indication_mice_40 <- mice::mice.mids(indication_mice, maxit = 40, print = FALSE)
plot(indication_mice_40)

Plot of observed (blue) and imputed (red) phase:

mice::stripplot(indication_mice, phase_clean ~ .imp, pch = 20, cex = 2)

# Under MCAR, univariate distributions of the observed and imputed data are expected to be identical. Under MAR, they can be different, both in location and spread, but their multivariate distribution is assumed to be identical.
mice::stripplot(indication_mice)

7.6.2.2.1 Analysis

Logistic regression analysis on the multiply imputed data.

indication_total_models_mice <- lapply(myvars, function (x){
  fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(indication_total_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(indication_total_models_mice) <- myvars
lapply(indication_total_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                       term    estimate std.error  statistic       df   p.value
## 1              (Intercept)  2.09467435 0.1630852 12.8440478 1635.510 0.0000000
## 2                covidTRUE -0.09703931 0.1521018 -0.6379892 1638.837 0.5235698
## 3 sponsor_typeInvestigator -0.20713098 0.3467191 -0.5974028 1588.804 0.5503236
## 4 sponsor_typeNon industry -0.07485039 0.1756249 -0.4261946 1634.875 0.6700222
## 
## $randomisation
##                       term   estimate std.error  statistic       df    p.value
## 1              (Intercept)  1.7735724 0.1444615 12.2771272 1636.083 0.00000000
## 2                covidTRUE -0.0406002 0.1301694 -0.3119027 1638.837 0.75515413
## 3 sponsor_typeInvestigator -0.1828022 0.3151404 -0.5800660 1602.453 0.56195166
## 4 sponsor_typeNon industry -0.2912784 0.1545898 -1.8842020 1635.489 0.05971489
## 
## $blinding
##                       term   estimate std.error statistic       df      p.value
## 1              (Intercept)  1.0926804 0.1143869  9.552493 1634.362 0.000000e+00
## 2                covidTRUE -0.6517555 0.1023601 -6.367279 1638.777 2.492386e-10
## 3 sponsor_typeInvestigator -0.4030700 0.2430830 -1.658157 1625.728 9.747860e-02
## 4 sponsor_typeNon industry -0.8962649 0.1202718 -7.451997 1630.631 1.483258e-13
## 
## $prospective
##                       term    estimate std.error  statistic       df
## 1              (Intercept)  1.01030138 0.1162402  8.6914985 1638.120
## 2                covidTRUE  0.09736661 0.1064722  0.9144792 1638.837
## 3 sponsor_typeInvestigator  0.21287662 0.2817292  0.7556071 1637.035
## 4 sponsor_typeNon industry -0.48652144 0.1252574 -3.8841727 1637.490
##        p.value
## 1 0.0000000000
## 2 0.3605996495
## 3 0.4499934889
## 4 0.0001067652
sum_indication_total_mice <- lapply(indication_total_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_total_mice <- lapply(sum_indication_total_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})

pool_OR_indication_total_mice <- do.call(rbind.data.frame, pool_OR_indication_total_mice)
pool_OR_indication_total_mice$Analysis <- "IM total (7)"

fmi: fraction of information about the coefficients missing due to nonresponse
lambda: proportion of the variation attributable to the missing data

mice::densityplot(indication_mice, ~ sponsor_type)

7.6.2.2.2 Compare complete case analysis with MICE analysis for the each outcome
7.6.2.2.2.1 Control arm
summary(glm(as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("control_arm", "~", paste(indication_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1126   0.4766   0.5016   0.5207   0.5529  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.11802    0.16488  12.846   <2e-16 ***
## covidTRUE                -0.08003    0.15282  -0.524    0.600    
## sponsor_typeInvestigator -0.23704    0.34704  -0.683    0.495    
## sponsor_typeNon industry -0.10833    0.17774  -0.609    0.542    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1204.1  on 1638  degrees of freedom
## Residual deviance: 1203.1  on 1635  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 1211.1
## 
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.2.2.2 Randomisation
summary(glm(as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("randomisation", "~", paste(indication_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9725   0.5554   0.6417   0.6505   0.6505  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               1.79103    0.14566  12.296   <2e-16 ***
## covidTRUE                -0.02994    0.13063  -0.229   0.8187    
## sponsor_typeInvestigator -0.20445    0.31530  -0.648   0.5167    
## sponsor_typeNon industry -0.31543    0.15604  -2.021   0.0432 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1523.7  on 1638  degrees of freedom
## Residual deviance: 1519.3  on 1635  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 1527.3
## 
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.2.2.3 Blinding
summary(glm(as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("blinding", "~", paste(indication_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6722  -1.2006   0.7533   1.0985   1.3712  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.1144     0.1151   9.682  < 2e-16 ***
## covidTRUE                 -0.6333     0.1026  -6.171 6.77e-10 ***
## sponsor_typeInvestigator  -0.4266     0.2435  -1.752   0.0798 .  
## sponsor_typeNon industry  -0.9259     0.1211  -7.648 2.04e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2264.3  on 1638  degrees of freedom
## Residual deviance: 2155.6  on 1635  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 2163.6
## 
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)
7.6.2.2.2.4 Prospective registration
summary(glm(as.formula(paste("prospective", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset))
## 
## Call:
## glm(formula = as.formula(paste("prospective", "~", paste(indication_total_adjustment, 
##     collapse = "+"))), family = binomial(link = "logit"), data = indication_dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7604  -1.4081   0.7912   0.9298   0.9631  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               1.00097    0.11622   8.613  < 2e-16 ***
## covidTRUE                 0.08724    0.10658   0.819 0.413071    
## sponsor_typeInvestigator  0.22247    0.28183   0.789 0.429894    
## sponsor_typeNon industry -0.47339    0.12543  -3.774 0.000161 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2070.4  on 1638  degrees of freedom
## Residual deviance: 2050.2  on 1635  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 2058.2
## 
## Number of Fisher Scoring iterations: 4
fit <- with(indication_mice, glm(as.formula(paste("prospective", "~", paste(indication_total_adjustment, collapse = "+"))), family = binomial(link = "logit")))
pooled_fit <- mice::pool(fit)
pooled_fit$pooled
summary(pooled_fit)

7.7 E-value (analysis 8)

E-values are calculated below.

#control arm main direct
control_direct_e <- EValue::evalues.OR(est = 2.18,
                                       lo = 1.48,
                                       hi = 3.21,
                                       rare = 0, 
          # 1 if the outcome <15% at end of follow up
                                       true = 1) 
        # odds ratio for which we want to calculate e-value (i.e. H0)

# randomisation main direct
randomisation_direct_e <- EValue::evalues.OR(est = 1.9,
                                             lo = 1.34,
                                             hi = 2.69,
                                             rare = 0, 
                                             true = 1) 

# control main total
control_total_e <- EValue::evalues.OR(est = 2.2,
                                      lo = 1.57,
                                      hi = 3.08,
                                      rare = 0, 
                                      true = 1) 

# randomisation main total
randomisation_total_e <- EValue::evalues.OR(est = 1.93,
                                            lo = 1.42,
                                            hi = 2.6,
                                            rare = 0, 
                                            true = 1) 
bind_rows(control_direct_e[2,],
          randomisation_direct_e[2,],
          control_total_e[2,],
          randomisation_total_e[2,]) %>% 
  rename(Point = point, 
         Lower = lower) %>% 
  mutate(Analysis = c("Control arm direct effect",
                      "Randomisation direct effect",
                      "Control arm total effect",
                      "Randomisation total effect")) %>% 
  select(Analysis, everything(), -upper)

7.8 Geographic regions as confounders (analysis 9)

Analyses 6 and 7 are repeated with geographical regions included as coviariates.

#  analysis 6 repeated
locations <- main_dataset %>%
  select(region_Africa:region_Oceania) %>%
  names()
region_sens_adjustment <- c(main_total_adjustment, locations)

main_sens_models_mice <- lapply(myvars, function (x){
  fit <- with(main_mice, glm(as.formula(paste(x, "~", paste(region_sens_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(main_sens_models_mice) <- myvars

lapply(main_sens_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                        term    estimate std.error  statistic        df
## 1               (Intercept)  0.92463459 0.2255080  4.1002295 1521.5804
## 2                 covidTRUE  0.80549532 0.1393394  5.7808154 1626.6395
## 3  sponsor_typeInvestigator -0.33981929 0.2770554 -1.2265392  562.7597
## 4  sponsor_typeNon industry  0.15503660 0.1637808  0.9466106 1489.5462
## 5          region_AfricaYes  1.04978281 0.3827758  2.7425524 1626.7237
## 6       region_N_AmericaYes -0.25923654 0.1710189 -1.5158359 1615.8124
## 7       region_L_AmericaYes  0.54001880 0.2497859  2.1619262 1626.2323
## 8            region_AsiaYes  0.22296493 0.1588778  1.4033740 1625.2715
## 9          region_EuropeYes -0.01444888 0.1613035 -0.0895757 1616.3551
## 10        region_OceaniaYes -0.02872399 0.2848034 -0.1008555 1626.2243
##         p.value
## 1  4.345463e-05
## 2  8.894384e-09
## 3  2.205088e-01
## 4  3.439908e-01
## 5  6.163213e-03
## 6  1.297564e-01
## 7  3.076932e-02
## 8  1.606964e-01
## 9  9.286355e-01
## 10 9.196776e-01
## 
## $randomisation
##                        term    estimate std.error   statistic       df
## 1               (Intercept)  0.71700211 0.2103646  3.40837758 1549.969
## 2                 covidTRUE  0.66224084 0.1247963  5.30657231 1626.803
## 3  sponsor_typeInvestigator -0.22181111 0.2613072 -0.84885190  655.925
## 4  sponsor_typeNon industry  0.06337223 0.1504714  0.42115794 1524.426
## 5          region_AfricaYes  0.76740848 0.3124267  2.45628309 1626.824
## 6       region_N_AmericaYes -0.37650700 0.1584059 -2.37684945 1618.033
## 7       region_L_AmericaYes  0.82777473 0.2392496  3.45987956 1626.464
## 8            region_AsiaYes  0.21476724 0.1467190  1.46379933 1625.267
## 9          region_EuropeYes -0.02558895 0.1486251 -0.17217112 1619.364
## 10        region_OceaniaYes -0.01308082 0.2671611 -0.04896231 1626.344
##         p.value
## 1  6.702483e-04
## 2  1.270782e-07
## 3  3.962734e-01
## 4  6.736991e-01
## 5  1.414211e-02
## 6  1.757693e-02
## 7  5.543722e-04
## 8  1.434422e-01
## 9  8.633245e-01
## 10 9.609553e-01
## 
## $blinding
##                        term    estimate std.error  statistic        df
## 1               (Intercept)  0.12718969 0.1783292  0.7132298 1564.9693
## 2                 covidTRUE -0.04401346 0.1056857 -0.4164560 1626.8380
## 3  sponsor_typeInvestigator -0.31023022 0.2323944 -1.3349299  755.6118
## 4  sponsor_typeNon industry -0.34618844 0.1299903 -2.6631868 1554.3899
## 5          region_AfricaYes  0.33955767 0.2165902  1.5677428 1626.8380
## 6       region_N_AmericaYes -0.01912501 0.1367622 -0.1398413 1619.8623
## 7       region_L_AmericaYes  0.75217242 0.1743395  4.3144121 1626.2338
## 8            region_AsiaYes -0.18990030 0.1200960 -1.5812372 1625.9589
## 9          region_EuropeYes -0.18954087 0.1256447 -1.5085464 1621.4527
## 10        region_OceaniaYes  0.06774484 0.2329078  0.2908654 1626.4096
##        p.value
## 1  0.475809928
## 2  0.677131273
## 3  0.182301346
## 4  0.007820362
## 5  0.117135643
## 6  0.888802739
## 7  0.000016966
## 8  0.114018298
## 9  0.131609552
## 10 0.771191323
## 
## $prospective
##                        term    estimate std.error  statistic        df
## 1               (Intercept)  0.56771304 0.2341241  2.4248376 1543.6665
## 2                 covidTRUE -0.02961938 0.1161146 -0.2550874 1626.7412
## 3  sponsor_typeInvestigator  0.38610170 0.2880821  1.3402487  475.9444
## 4  sponsor_typeNon industry -0.32474564 0.1477735 -2.1975899 1465.0033
## 5          region_AfricaYes  0.59391237 0.2847599  2.0856602 1626.8380
## 6       region_N_AmericaYes  0.87886175 0.1845045  4.7633619 1618.0177
## 7       region_L_AmericaYes -0.27258522 0.2136349 -1.2759393 1624.6454
## 8            region_AsiaYes  0.25516163 0.1703336  1.4980107 1623.9924
## 9          region_EuropeYes  0.52142650 0.1713305  3.0433961 1620.9812
## 10        region_OceaniaYes  2.66495323 0.7242927  3.6793870 1626.8065
##         p.value
## 1  1.542936e-02
## 2  7.986878e-01
## 3  1.808040e-01
## 4  2.813438e-02
## 5  3.716492e-02
## 6  2.074215e-06
## 7  2.021594e-01
## 8  1.343248e-01
## 9  2.376853e-03
## 10 2.414117e-04
sum_main_sens_mice <- lapply(main_sens_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_main_sens_mice <- lapply(sum_main_sens_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})
do.call(rbind.data.frame, pool_OR_main_sens_mice) 
pool_OR_main_sens_mice <- do.call(rbind.data.frame, pool_OR_main_sens_mice)
pool_OR_main_sens_mice$Analysis <- "Main total sensitivity (9)"
# analysis 7 repeated 
indication_sens_models_mice <- lapply(myvars, function (x){
  fit <- with(indication_mice, glm(as.formula(paste(x, "~", paste(region_sens_adjustment, collapse = "+"))),
                             family = binomial(link = "logit")))
  pooled_fit <- mice::pool(fit)
  return(list("fit" = fit, "pooled_fit" = pooled_fit))
                            })

names(indication_sens_models_mice) <- myvars

lapply(indication_sens_models_mice, function(x) summary(x$pooled_fit))
## $control_arm
##                        term    estimate std.error   statistic       df
## 1               (Intercept)  1.81275402 0.2882392  6.28906031 1630.035
## 2                 covidTRUE -0.07999277 0.1546623 -0.51720917 1632.837
## 3  sponsor_typeInvestigator -0.10897752 0.3524012 -0.30924276 1580.015
## 4  sponsor_typeNon industry  0.00509626 0.1887061  0.02700633 1627.869
## 5          region_AfricaYes  0.90737149 0.4311709  2.10443568 1632.837
## 6       region_N_AmericaYes -0.04506925 0.2214176 -0.20354860 1632.618
## 7       region_L_AmericaYes  0.28267199 0.2926553  0.96588706 1632.837
## 8            region_AsiaYes  0.21874074 0.2091697  1.04575728 1632.767
## 9          region_EuropeYes  0.15527358 0.2115949  0.73382475 1632.717
## 10        region_OceaniaYes  0.40407508 0.4396095  0.91916817 1632.837
##         p.value
## 1  4.092813e-10
## 2  6.050802e-01
## 3  7.571776e-01
## 4  9.784580e-01
## 5  3.549297e-02
## 6  8.387316e-01
## 7  3.342437e-01
## 8  2.958279e-01
## 9  4.631609e-01
## 10 3.581434e-01
## 
## $randomisation
##                        term    estimate std.error  statistic       df
## 1               (Intercept)  1.53340337 0.2610196  5.8746684 1630.579
## 2                 covidTRUE -0.04529754 0.1328551 -0.3409544 1632.837
## 3  sponsor_typeInvestigator -0.08131805 0.3212962 -0.2530937 1594.090
## 4  sponsor_typeNon industry -0.22883974 0.1669144 -1.3710004 1628.672
## 5          region_AfricaYes  1.15845946 0.4012314  2.8872602 1632.837
## 6       region_N_AmericaYes -0.24843756 0.1973063 -1.2591469 1632.676
## 7       region_L_AmericaYes  0.48041701 0.2693637  1.7835254 1632.837
## 8            region_AsiaYes  0.20275768 0.1882694  1.0769550 1632.779
## 9          region_EuropeYes  0.21390278 0.1904730  1.1230082 1632.759
## 10        region_OceaniaYes  0.47610945 0.3874572  1.2288052 1632.837
##         p.value
## 1  5.125650e-09
## 2  7.331818e-01
## 3  8.002284e-01
## 4  1.705637e-01
## 5  3.937275e-03
## 6  2.081573e-01
## 7  7.468650e-02
## 8  2.816595e-01
## 9  2.615991e-01
## 10 2.193219e-01
## 
## $blinding
##                        term     estimate std.error   statistic       df
## 1               (Intercept)  1.039578786 0.1923261  5.40529152 1629.438
## 2                 covidTRUE -0.656840267 0.1050519 -6.25253037 1632.837
## 3  sponsor_typeInvestigator -0.309842999 0.2486467 -1.24611742 1622.256
## 4  sponsor_typeNon industry -0.806035155 0.1325098 -6.08283241 1624.866
## 5          region_AfricaYes  0.474082324 0.2216708  2.13867695 1632.837
## 6       region_N_AmericaYes -0.034358864 0.1438686 -0.23882122 1632.318
## 7       region_L_AmericaYes  0.563505302 0.1894365  2.97463957 1632.837
## 8            region_AsiaYes -0.232913209 0.1306805 -1.78231029 1632.837
## 9          region_EuropeYes -0.003958487 0.1343984 -0.02945337 1632.546
## 10        region_OceaniaYes  0.380242106 0.2762278  1.37655274 1632.837
##         p.value
## 1  7.429188e-08
## 2  5.144472e-10
## 3  2.129012e-01
## 4  1.469325e-09
## 5  3.260984e-02
## 6  8.112742e-01
## 7  2.976381e-03
## 8  7.488441e-02
## 9  9.765066e-01
## 10 1.688393e-01
## 
## $prospective
##                        term    estimate std.error  statistic       df
## 1               (Intercept)  0.22922086 0.2093076  1.0951389 1631.216
## 2                 covidTRUE  0.25804047 0.1105096  2.3350057 1632.837
## 3  sponsor_typeInvestigator  0.49507774 0.2896269  1.7093637 1628.929
## 4  sponsor_typeNon industry -0.15187081 0.1376295 -1.1034756 1628.710
## 5          region_AfricaYes  0.66931008 0.2539976  2.6351035 1632.837
## 6       region_N_AmericaYes  0.86141832 0.1685819  5.1097903 1632.729
## 7       region_L_AmericaYes -0.20001925 0.1995483 -1.0023603 1632.837
## 8            region_AsiaYes  0.03818506 0.1505652  0.2536115 1632.829
## 9          region_EuropeYes  0.60893932 0.1564052  3.8933433 1632.800
## 10        region_OceaniaYes  1.27487627 0.3719383  3.4276553 1632.837
##         p.value
## 1  2.736175e-01
## 2  1.966375e-02
## 3  8.757407e-02
## 4  2.699837e-01
## 5  8.490747e-03
## 6  3.604149e-07
## 7  3.163181e-01
## 8  7.998277e-01
## 9  1.028542e-04
## 10 6.239365e-04
sum_indication_sens_mice <- lapply(indication_sens_models_mice, function(x) summary(x$pooled_fit))

# OR and 95% CI (Bonferroni corrected)
pool_OR_indication_sens_mice <- lapply(sum_indication_sens_mice, function(x) {
  cbind(exp(cbind(x[2,2], (x[2,2]-z*(x[2,3])), 
           (x[2,2]+z*(x[2,3])))), x[2,6])
})

pool_OR_indication_sens_mice <- do.call(rbind.data.frame, pool_OR_indication_sens_mice)
pool_OR_indication_sens_mice$Analysis <- "IM total sensitivity (9)"

7.9 Analysis 10

Analysis without inferring outcomes.

Create datasets.

main_dataset_2_control <- main_dataset_2[!is.na(main_dataset_2$control_arm), ]
main_dataset_2_randomisation <- main_dataset_2[
  !is.na(main_dataset_2$randomisation) & 
    main_dataset_2$randomisation != "Not applicable", 
  ]
main_dataset_2_blinding <- main_dataset_2[!is.na(main_dataset_2$blinding), ]


indication_dataset_2_control <- indication_dataset_2[!is.na(indication_dataset_2$control_arm), ]
indication_dataset_2_randomisation <- indication_dataset_2[
  !is.na(indication_dataset_2$randomisation) & 
    indication_dataset_2$randomisation != "Not applicable", 
  ]
indication_dataset_2_blinding <- indication_dataset_2[!is.na(indication_dataset_2$blinding), ]

table(indication_dataset_2$control_arm, useNA = "a")
## 
##   No  Yes <NA> 
##  203 1453    1
table(indication_dataset_2$randomisation, useNA = "a")
## 
##             No Not applicable            Yes           <NA> 
##             86            203           1362              6
table(indication_dataset_2$blinding, useNA = "a")
## 
##   No  Yes <NA> 
##  720  881   56
table(indication_dataset_2_control$control_arm, useNA = "a")
## 
##   No  Yes <NA> 
##  203 1453    0
table(indication_dataset_2_randomisation$randomisation, useNA = "a")
## 
##             No Not applicable            Yes           <NA> 
##             86              0           1362              0
table(indication_dataset_2_blinding$blinding, useNA = "a")
## 
##   No  Yes <NA> 
##  720  881    0
# nothing to do for prospective

Main direct adjustment with only non-inferred outcomes.

main_direct_models_10 <- list()

main_direct_models_10[[1]] <- glm(
  as.formula(paste("control_arm", "~", paste(main_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_control)

# source registry needs to be grouped further because it created convergence
# issues
main_dataset_2_randomisation <- main_dataset_2_randomisation %>% 
  mutate(source_registry = fct_lump_n(
    main_dataset_2_randomisation$source_registry, n = 5))

main_direct_models_10[[2]]<- glm(
  as.formula(paste("randomisation", "~", paste(main_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_randomisation)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
main_direct_models_10[[3]]<- glm(
  as.formula(paste("blinding", "~", paste(main_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_blinding)

sjPlot::plot_models(main_direct_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

coef_names <- names(coef(main_direct_models_10[[1]]))

sjPlot::plot_models(main_direct_models_10, 
                    prefix.labels = "varname",
                    rm.terms = coef_names[3:length(coef_names)])+
  theme_bw() + geom_hline(yintercept = 1)

Indication matched direct adjustment with only non-inferred outcomes.

indication_direct_models_10 <- list()

indication_direct_models_10[[1]] <- glm(
  as.formula(paste("control_arm", "~", paste(indication_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_control)

indication_direct_models_10[[2]]<- glm(
  as.formula(paste("randomisation", "~", paste(indication_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_randomisation)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
indication_direct_models_10[[3]]<- glm(
  as.formula(paste("blinding", "~", paste(indication_direct_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_blinding)

sjPlot::plot_models(indication_direct_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

coef_names <- names(coef(indication_direct_models_10[[1]]))

sjPlot::plot_models(indication_direct_models_10, 
                    prefix.labels = "varname",
                    rm.terms = coef_names[3:length(coef_names)])+
  theme_bw() + geom_hline(yintercept = 1)

Main total adjustment with only non-inferred outcomes.

main_total_models_10 <- list()

main_total_models_10[[1]] <- glm(
  as.formula(paste("control_arm", "~", paste(main_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_control)

main_total_models_10[[2]]<- glm(
  as.formula(paste("randomisation", "~", paste(main_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_randomisation)

main_total_models_10[[3]]<- glm(
  as.formula(paste("blinding", "~", paste(main_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = main_dataset_2_blinding)

sjPlot::plot_models(main_total_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

Indication matched total adjustment with only non-inferred outcomes.

indication_total_models_10 <- list()

indication_total_models_10[[1]] <- glm(
  as.formula(paste("control_arm", "~", paste(indication_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_control)

indication_total_models_10[[2]]<- glm(
  as.formula(paste("randomisation", "~", paste(indication_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_randomisation)

indication_total_models_10[[3]]<- glm(
  as.formula(paste("blinding", "~", paste(indication_total_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = indication_dataset_2_blinding)

sjPlot::plot_models(indication_total_models_10, prefix.labels = "varname") + theme_bw() + geom_hline(yintercept = 1)

7.10 Overall results

colnames(pool_OR_main_direct_mice) <- colnames(pool_OR_indication_direct_mice)

result <- bind_rows(pool_OR_main_direct_mice,
                    pool_OR_indication_direct_mice,
                    pool_OR_main_total_mice,
                    pool_OR_indication_total_mice,
                    pool_OR_main_sens_mice,
                    pool_OR_indication_sens_mice)

colnames(result) <- c("Estimate", "Lower.CI", "Upper.CI", "P-Value", "Analysis")
result$Outcome <- rep(c("Control arm", "Randomisation", "Blinding" , "Prospective"), 6)

result <- result %>% 
  select(Analysis, Outcome, everything())

rownames(result) <- NULL
result_t <- result
result_t[3:6] <-lapply(result[3:6], signif, digits=3)

result_t$x <- paste0(result_t$Estimate, " [", 
       result_t$Lower.CI, "-", result_t$Upper.CI, "]")

result_t
x <- result %>% 
  transform(
    Analysis = factor(
      Analysis, levels = c("Main direct (4)", 
                           "IM direct (5)",
                           "Main total (6)",
                           "IM total (7)",
                           "Main total sensitivity (9)",
                           "IM total sensitivity (9)")),
    Outcome = factor(
      Outcome, levels = c("Prospective",
                          "Blinding",
                          "Randomisation",
                          "Control arm")
    ))
  
p <- ggplot(x, aes(Outcome, Estimate)) + 
  geom_point() +
  geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI)) +
  geom_hline(yintercept=1, linetype="dashed", color = "red") +
  labs(y = "Odds ratio")

p <- p + coord_flip()
p + facet_wrap(~Analysis, ncol = 1) +
  theme_light()

7.10.1 Analysis 10 summary results

all_models_10 <- c(main_direct_models_10, indication_direct_models_10,
                   main_total_models_10, indication_total_models_10)

names(all_models_10) <- rep(c("control_arm", "randomisation", "blinding"), 4)

sum_10 <- lapply(all_models_10, summary)
sum_10 <- lapply(sum_10, function (x) x[["coefficients"]])

pool_OR_all_10 <- lapply(sum_10, function(x) {
  cbind(exp(cbind(x[2,1], (x[2,1]-z*(x[2,2])), 
           (x[2,1]+z*(x[2,2])))), x[2,4])
})

result_10 <- do.call(rbind.data.frame, pool_OR_all_10)

colnames(result_10) <- c("Estimate", "Lower.CI", "Upper.CI", "P-Value")
result_10$Outcome <- rep(c("Control arm", "Randomisation", "Blinding"), 4)
result_10$Analysis <- c(rep("Main direct (4)", 3),
                      rep("IM direct (5)", 3),
                      rep("Main total (6)", 3),
                      rep("IM total (7)", 3))

result_10 <- result_10 %>% 
  select(Analysis, Outcome, everything())

rownames(result_10) <- NULL
result_10_t <- result_10
result_10_t[3:6] <-lapply(result_10[3:6], signif, digits=3)

result_10_t$x <- paste0(result_10_t$Estimate, " [", 
       result_10_t$Lower.CI, "-", result_10_t$Upper.CI, "]")

result_10_t
x <- result_10 %>% 
  transform(
    Analysis = factor(
      Analysis, levels = c("Main direct (4)", 
                           "IM direct (5)",
                           "Main total (6)",
                           "IM total (7)")),
    Outcome = factor(
      Outcome, levels = c("Blinding",
                          "Randomisation",
                          "Control arm")
    ))
  
p <- ggplot(x, aes(Outcome, Estimate)) + 
  geom_point() +
  geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI)) +
  geom_hline(yintercept=1, linetype="dashed", color = "red") +
  labs(y = "Odds ratio")

p <- p + coord_flip()
p + facet_wrap(~Analysis, ncol = 1) +
  theme_light()

8 Exploratory analysis

The code used to define the adjustment sets is given in a separate script.

• Control arm: sample size, sponsor type, regions, phase, intervention type, and primary purpose. • Randomisation: sample size, sponsor type, region, control arm, multicentre, vaccine (i.e. a particular intervention type) and primary purpose • Blinding: sample size, sponsor type, randomisation, region, control arm, phase, intervention type, primary purpose • Prospective registration: sample size, sponsor type, region, phase, conventional (i.e. a particular intervention type)

control_adjustment <- c("covid",
                        "sample_size", "sponsor_type", 
                        "region_Africa", "region_N_America",
                        "region_L_America", "region_N_America",
                        "region_Oceania",
                        "phase_clean", 
                        "vaccine", "conventional", "traditional",
                        "primary_purpose")

randomisation_adjustment <- c("covid",
                              "sample_size", "sponsor_type", 
                        "region_Africa", "region_N_America",
                        "region_L_America", "region_N_America",
                        "region_Oceania",
                        #"control_arm", leads to issues with convergence
                        "multicentre", 
                        "vaccine", "primary_purpose")

blinding_adjustment <- c("covid",
                         "sample_size", "sponsor_type",
                         "randomisation",
                         "region_Africa", "region_N_America",
                        "region_L_America", "region_N_America",
                        "region_Oceania", 
                       # "control_arm", leads to issues with convergence
                        "phase_clean",
                        "vaccine", "conventional", "traditional",
                        "primary_purpose")

prospective_adjustment <- c("covid",
                            "sample_size", "sponsor_type", 
                            "region_Africa", "region_N_America",
                            "region_L_America", "region_N_America",
                            "region_Oceania",
                            "phase_clean", 
                            "conventional")

Complete cases are used for the exploratory analysis.

d1 <- d %>% 
  select(study_arm:traditional) %>% 
  filter(study_arm == "covid" |
           study_arm == "main")
d1 <- d1[complete.cases(d1),]

d1$covid <- ifelse(d1$study_arm == "covid", "Yes", "No")
control_exp <- glm(as.formula(
  paste0("control_arm", "~",
         paste(control_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = d1)

sjPlot::plot_model(control_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(control_exp)
  Control arm
Predictors Odds Ratios CI p
(Intercept) 0.14 0.03 – 0.59 0.011
covidYes 2.18 1.60 – 2.97 <0.001
Sample size 2.13 1.83 – 2.49 <0.001
Sponsor type:
Investigator
0.81 0.44 – 1.55 0.516
Sponsor type: Non
industry
1.06 0.75 – 1.49 0.739
Africa: Yes 1.75 0.84 – 4.13 0.162
North America: Yes 0.58 0.41 – 0.81 0.001
Latin America: Yes 1.03 0.61 – 1.81 0.907
Oceania: Yes 0.65 0.35 – 1.23 0.172
Phase: Phase 2 0.72 0.47 – 1.10 0.131
Phase: Phase 3 1.66 0.97 – 2.85 0.065
Phase: Phase 4 1.02 0.55 – 1.91 0.958
Phase: Undefined 0.76 0.44 – 1.32 0.331
Vaccine: Yes 0.76 0.20 – 3.24 0.688
Conventional: Yes 2.11 0.63 – 8.94 0.261
Traditional: Yes 4.37 1.40 – 18.37 0.022
Primary purpose:
Prevention
0.86 0.39 – 1.95 0.720
Primary purpose:
Treatment
0.44 0.25 – 0.76 0.004
Observations 1539
R2 Tjur 0.188
randomisation_exp <- glm(as.formula(
  paste0("randomisation", "~",paste(randomisation_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = d1)

sjPlot::plot_model(randomisation_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(randomisation_exp)
  randomisation
Predictors Odds Ratios CI p
(Intercept) 0.12 0.06 – 0.24 <0.001
covidYes 1.81 1.38 – 2.39 <0.001
Sample size 2.46 2.14 – 2.86 <0.001
Sponsor type:
Investigator
0.82 0.46 – 1.51 0.517
Sponsor type: Non
industry
0.94 0.67 – 1.30 0.693
Africa: Yes 1.29 0.69 – 2.57 0.445
North America: Yes 0.46 0.33 – 0.63 <0.001
Latin America: Yes 1.50 0.91 – 2.57 0.121
Oceania: Yes 0.70 0.39 – 1.27 0.231
Multicentre: Yes 0.71 0.51 – 0.97 0.032
Vaccine: Yes 0.25 0.12 – 0.53 <0.001
Primary purpose:
Prevention
0.78 0.39 – 1.55 0.473
Primary purpose:
Treatment
0.62 0.38 – 1.00 0.057
Observations 1539
R2 Tjur 0.203
blinding_exp <- glm(as.formula(
  paste0("blinding", "~",paste(blinding_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = d1)

sjPlot::plot_model(blinding_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(blinding_exp)
  Blinding
Predictors Odds Ratios CI p
(Intercept) 0.01 0.00 – 0.04 <0.001
covidYes 0.57 0.44 – 0.75 <0.001
Sample size 0.96 0.85 – 1.07 0.429
Sponsor type:
Investigator
1.00 0.55 – 1.84 0.999
Sponsor type: Non
industry
0.61 0.44 – 0.84 0.002
randomisation: Yes 219.09 89.48 – 728.42 <0.001
Africa: Yes 1.00 0.60 – 1.66 0.986
North America: Yes 1.52 1.09 – 2.14 0.014
Latin America: Yes 1.71 1.12 – 2.65 0.015
Oceania: Yes 0.86 0.47 – 1.58 0.608
Phase: Phase 2 1.44 0.91 – 2.27 0.116
Phase: Phase 3 1.27 0.80 – 2.01 0.313
Phase: Phase 4 0.75 0.43 – 1.28 0.290
Phase: Undefined 1.67 0.99 – 2.83 0.056
Vaccine: Yes 2.51 0.85 – 7.88 0.104
Conventional: Yes 1.03 0.49 – 2.23 0.931
Traditional: Yes 1.11 0.57 – 2.20 0.758
Primary purpose:
Prevention
2.57 1.38 – 4.83 0.003
Primary purpose:
Treatment
0.94 0.58 – 1.51 0.805
Observations 1539
R2 Tjur 0.326
prospective_exp <- glm(as.formula(
  paste0("prospective", "~",paste(prospective_adjustment, collapse = "+"))),
  family = binomial(link = "logit"),
  data = d1)

sjPlot::plot_model(prospective_exp, ci.lvl = 1-0.0125)

sjPlot::tab_model(prospective_exp)
  Prospective registration
Predictors Odds Ratios CI p
(Intercept) 1.94 1.04 – 3.67 0.039
covidYes 0.81 0.64 – 1.04 0.099
Sample size 1.16 1.05 – 1.28 0.003
Sponsor type:
Investigator
1.51 0.83 – 2.91 0.192
Sponsor type: Non
industry
0.58 0.43 – 0.78 <0.001
Africa: Yes 1.58 0.94 – 2.78 0.095
North America: Yes 1.87 1.36 – 2.57 <0.001
Latin America: Yes 0.67 0.45 – 1.01 0.054
Oceania: Yes 15.02 4.62 – 92.27 <0.001
Phase: Phase 2 1.62 1.09 – 2.38 0.016
Phase: Phase 3 0.99 0.65 – 1.48 0.945
Phase: Phase 4 1.35 0.82 – 2.22 0.239
Phase: Undefined 1.18 0.75 – 1.85 0.467
Conventional: Yes 0.64 0.44 – 0.92 0.018
Observations 1539
R2 Tjur 0.083

9 Outcome neutral criterion 4

Used to generate random 30 per arm to check.

set.seed(5)
cov_30 <- main_dataset %>% 
  filter(study_arm == "covid") %>% 
  sample_n(30)

main_30 <- main_dataset %>% 
  filter(study_arm == "main") %>% 
  sample_n(30)
  
im_30 <- indication_dataset %>% 
  filter(study_arm == "im") %>% 
  sample_n(30)

check_90 <- bind_rows(cov_30, main_30, im_30)
write_csv(check_90, file = "data/check/final_check.csv")